1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifdef DEBUG 28 #define XNB_DEBUG 1 29 #endif /* DEBUG */ 30 31 #include "xnb.h" 32 33 #include <sys/sunddi.h> 34 #include <sys/sunndi.h> 35 #include <sys/modctl.h> 36 #include <sys/conf.h> 37 #include <sys/mac.h> 38 #include <sys/mac_impl.h> /* For mac_fix_cksum(). */ 39 #include <sys/dlpi.h> 40 #include <sys/strsubr.h> 41 #include <sys/strsun.h> 42 #include <sys/types.h> 43 #include <sys/pattr.h> 44 #include <vm/seg_kmem.h> 45 #include <vm/hat_i86.h> 46 #include <xen/sys/xenbus_impl.h> 47 #include <xen/sys/xendev.h> 48 #include <sys/balloon_impl.h> 49 #include <sys/evtchn_impl.h> 50 #include <sys/gnttab.h> 51 #include <vm/vm_dep.h> 52 #include <sys/note.h> 53 #include <sys/gld.h> 54 #include <inet/ip.h> 55 #include <inet/ip_impl.h> 56 57 /* 58 * The terms "transmit" and "receive" are used in alignment with domU, 59 * which means that packets originating from the peer domU are "transmitted" 60 * to other parts of the system and packets are "received" from them. 61 */ 62 63 /* 64 * Should we allow guests to manipulate multicast group membership? 65 */ 66 static boolean_t xnb_multicast_control = B_TRUE; 67 68 static boolean_t xnb_connect_rings(dev_info_t *); 69 static void xnb_disconnect_rings(dev_info_t *); 70 static void xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t, 71 void *, void *); 72 static void xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t, 73 void *, void *); 74 75 static int xnb_txbuf_constructor(void *, void *, int); 76 static void xnb_txbuf_destructor(void *, void *); 77 static void xnb_tx_notify_peer(xnb_t *, boolean_t); 78 static void xnb_tx_mark_complete(xnb_t *, RING_IDX, int16_t); 79 80 mblk_t *xnb_to_peer(xnb_t *, mblk_t *); 81 mblk_t *xnb_copy_to_peer(xnb_t *, mblk_t *); 82 83 static void setup_gop(xnb_t *, gnttab_copy_t *, uchar_t *, 84 size_t, size_t, size_t, grant_ref_t); 85 #pragma inline(setup_gop) 86 static boolean_t is_foreign(void *); 87 #pragma inline(is_foreign) 88 89 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) 90 #define INVALID_GRANT_REF ((grant_ref_t)-1) 91 92 static kmutex_t xnb_alloc_page_lock; 93 94 /* 95 * On a 32 bit PAE system physical and machine addresses are larger 96 * than 32 bits. ddi_btop() on such systems take an unsigned long 97 * argument, and so addresses above 4G are truncated before ddi_btop() 98 * gets to see them. To avoid this, code the shift operation here. 99 */ 100 #define xnb_btop(addr) ((addr) >> PAGESHIFT) 101 102 /* DMA attributes for transmit and receive data */ 103 static ddi_dma_attr_t buf_dma_attr = { 104 DMA_ATTR_V0, /* version of this structure */ 105 0, /* lowest usable address */ 106 0xffffffffffffffffULL, /* highest usable address */ 107 0x7fffffff, /* maximum DMAable byte count */ 108 MMU_PAGESIZE, /* alignment in bytes */ 109 0x7ff, /* bitmap of burst sizes */ 110 1, /* minimum transfer */ 111 0xffffffffU, /* maximum transfer */ 112 0xffffffffffffffffULL, /* maximum segment length */ 113 1, /* maximum number of segments */ 114 1, /* granularity */ 115 0, /* flags (reserved) */ 116 }; 117 118 /* DMA access attributes for data: NOT to be byte swapped. */ 119 static ddi_device_acc_attr_t data_accattr = { 120 DDI_DEVICE_ATTR_V0, 121 DDI_NEVERSWAP_ACC, 122 DDI_STRICTORDER_ACC 123 }; 124 125 /* 126 * Statistics. 127 */ 128 static const char * const aux_statistics[] = { 129 "rx_cksum_deferred", 130 "tx_cksum_no_need", 131 "rx_rsp_notok", 132 "tx_notify_deferred", 133 "tx_notify_sent", 134 "rx_notify_deferred", 135 "rx_notify_sent", 136 "tx_too_early", 137 "rx_too_early", 138 "rx_allocb_failed", 139 "tx_allocb_failed", 140 "rx_foreign_page", 141 "mac_full", 142 "spurious_intr", 143 "allocation_success", 144 "allocation_failure", 145 "small_allocation_success", 146 "small_allocation_failure", 147 "other_allocation_failure", 148 "rx_pageboundary_crossed", 149 "rx_cpoparea_grown", 150 "csum_hardware", 151 "csum_software", 152 "tx_overflow_page", 153 "tx_unexpected_flags", 154 }; 155 156 static int 157 xnb_ks_aux_update(kstat_t *ksp, int flag) 158 { 159 xnb_t *xnbp; 160 kstat_named_t *knp; 161 162 if (flag != KSTAT_READ) 163 return (EACCES); 164 165 xnbp = ksp->ks_private; 166 knp = ksp->ks_data; 167 168 /* 169 * Assignment order should match that of the names in 170 * aux_statistics. 171 */ 172 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_deferred; 173 (knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_no_need; 174 (knp++)->value.ui64 = xnbp->xnb_stat_rx_rsp_notok; 175 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred; 176 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent; 177 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred; 178 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent; 179 (knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early; 180 (knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early; 181 (knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed; 182 (knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed; 183 (knp++)->value.ui64 = xnbp->xnb_stat_rx_foreign_page; 184 (knp++)->value.ui64 = xnbp->xnb_stat_mac_full; 185 (knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr; 186 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_success; 187 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure; 188 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success; 189 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure; 190 (knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure; 191 (knp++)->value.ui64 = xnbp->xnb_stat_rx_pagebndry_crossed; 192 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cpoparea_grown; 193 (knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware; 194 (knp++)->value.ui64 = xnbp->xnb_stat_csum_software; 195 (knp++)->value.ui64 = xnbp->xnb_stat_tx_overflow_page; 196 (knp++)->value.ui64 = xnbp->xnb_stat_tx_unexpected_flags; 197 198 return (0); 199 } 200 201 static boolean_t 202 xnb_ks_init(xnb_t *xnbp) 203 { 204 int nstat = sizeof (aux_statistics) / 205 sizeof (aux_statistics[0]); 206 const char * const *cp = aux_statistics; 207 kstat_named_t *knp; 208 209 /* 210 * Create and initialise kstats. 211 */ 212 xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo), 213 ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net", 214 KSTAT_TYPE_NAMED, nstat, 0); 215 if (xnbp->xnb_kstat_aux == NULL) 216 return (B_FALSE); 217 218 xnbp->xnb_kstat_aux->ks_private = xnbp; 219 xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update; 220 221 knp = xnbp->xnb_kstat_aux->ks_data; 222 while (nstat > 0) { 223 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 224 225 knp++; 226 cp++; 227 nstat--; 228 } 229 230 kstat_install(xnbp->xnb_kstat_aux); 231 232 return (B_TRUE); 233 } 234 235 static void 236 xnb_ks_free(xnb_t *xnbp) 237 { 238 kstat_delete(xnbp->xnb_kstat_aux); 239 } 240 241 /* 242 * Calculate and insert the transport checksum for an arbitrary packet. 243 */ 244 static mblk_t * 245 xnb_software_csum(xnb_t *xnbp, mblk_t *mp) 246 { 247 _NOTE(ARGUNUSED(xnbp)); 248 249 /* 250 * XXPV dme: shouldn't rely on mac_fix_cksum(), not least 251 * because it doesn't cover all of the interesting cases :-( 252 */ 253 (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, 254 HCK_FULLCKSUM, KM_NOSLEEP); 255 256 return (mac_fix_cksum(mp)); 257 } 258 259 mblk_t * 260 xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) 261 { 262 struct ether_header *ehp; 263 uint16_t sap; 264 uint32_t offset; 265 ipha_t *ipha; 266 267 ASSERT(mp->b_next == NULL); 268 269 /* 270 * Check that the packet is contained in a single mblk. In 271 * the "from peer" path this is true today, but may change 272 * when scatter gather support is added. In the "to peer" 273 * path we cannot be sure, but in most cases it will be true 274 * (in the xnbo case the packet has come from a MAC device 275 * which is unlikely to split packets). 276 */ 277 if (mp->b_cont != NULL) 278 goto software; 279 280 /* 281 * If the MAC has no hardware capability don't do any further 282 * checking. 283 */ 284 if (capab == 0) 285 goto software; 286 287 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 288 ehp = (struct ether_header *)mp->b_rptr; 289 290 if (ntohs(ehp->ether_type) == VLAN_TPID) { 291 struct ether_vlan_header *evhp; 292 293 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 294 evhp = (struct ether_vlan_header *)mp->b_rptr; 295 sap = ntohs(evhp->ether_type); 296 offset = sizeof (struct ether_vlan_header); 297 } else { 298 sap = ntohs(ehp->ether_type); 299 offset = sizeof (struct ether_header); 300 } 301 302 /* 303 * We only attempt to do IPv4 packets in hardware. 304 */ 305 if (sap != ETHERTYPE_IP) 306 goto software; 307 308 /* 309 * We know that this is an IPv4 packet. 310 */ 311 ipha = (ipha_t *)(mp->b_rptr + offset); 312 313 switch (ipha->ipha_protocol) { 314 case IPPROTO_TCP: 315 case IPPROTO_UDP: { 316 uint32_t start, length, stuff, cksum; 317 uint16_t *stuffp; 318 319 /* 320 * This is a TCP/IPv4 or UDP/IPv4 packet, for which we 321 * can use full IPv4 and partial checksum offload. 322 */ 323 if ((capab & (HCKSUM_INET_FULL_V4|HCKSUM_INET_PARTIAL)) == 0) 324 break; 325 326 start = IP_SIMPLE_HDR_LENGTH; 327 length = ntohs(ipha->ipha_length); 328 if (ipha->ipha_protocol == IPPROTO_TCP) { 329 stuff = start + TCP_CHECKSUM_OFFSET; 330 cksum = IP_TCP_CSUM_COMP; 331 } else { 332 stuff = start + UDP_CHECKSUM_OFFSET; 333 cksum = IP_UDP_CSUM_COMP; 334 } 335 stuffp = (uint16_t *)(mp->b_rptr + offset + stuff); 336 337 if (capab & HCKSUM_INET_FULL_V4) { 338 /* 339 * Some devices require that the checksum 340 * field of the packet is zero for full 341 * offload. 342 */ 343 *stuffp = 0; 344 345 (void) hcksum_assoc(mp, NULL, NULL, 346 0, 0, 0, 0, 347 HCK_FULLCKSUM, KM_NOSLEEP); 348 349 xnbp->xnb_stat_csum_hardware++; 350 351 return (mp); 352 } 353 354 if (capab & HCKSUM_INET_PARTIAL) { 355 if (*stuffp == 0) { 356 ipaddr_t src, dst; 357 358 /* 359 * Older Solaris guests don't insert 360 * the pseudo-header checksum, so we 361 * calculate it here. 362 */ 363 src = ipha->ipha_src; 364 dst = ipha->ipha_dst; 365 366 cksum += (dst >> 16) + (dst & 0xFFFF); 367 cksum += (src >> 16) + (src & 0xFFFF); 368 cksum += length - IP_SIMPLE_HDR_LENGTH; 369 370 cksum = (cksum >> 16) + (cksum & 0xFFFF); 371 cksum = (cksum >> 16) + (cksum & 0xFFFF); 372 373 ASSERT(cksum <= 0xFFFF); 374 375 *stuffp = (uint16_t)(cksum ? cksum : ~cksum); 376 } 377 378 (void) hcksum_assoc(mp, NULL, NULL, 379 start, stuff, length, 0, 380 HCK_PARTIALCKSUM, KM_NOSLEEP); 381 382 xnbp->xnb_stat_csum_hardware++; 383 384 return (mp); 385 } 386 387 /* NOTREACHED */ 388 break; 389 } 390 391 default: 392 /* Use software. */ 393 break; 394 } 395 396 software: 397 /* 398 * We are not able to use any offload so do the whole thing in 399 * software. 400 */ 401 xnbp->xnb_stat_csum_software++; 402 403 return (xnb_software_csum(xnbp, mp)); 404 } 405 406 int 407 xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data) 408 { 409 xnb_t *xnbp; 410 char *xsname; 411 char cachename[32]; 412 413 xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP); 414 415 xnbp->xnb_flavour = flavour; 416 xnbp->xnb_flavour_data = flavour_data; 417 xnbp->xnb_devinfo = dip; 418 xnbp->xnb_evtchn = INVALID_EVTCHN; 419 xnbp->xnb_irq = B_FALSE; 420 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 421 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 422 xnbp->xnb_connected = B_FALSE; 423 xnbp->xnb_hotplugged = B_FALSE; 424 xnbp->xnb_detachable = B_FALSE; 425 xnbp->xnb_peer = xvdi_get_oeid(dip); 426 xnbp->xnb_be_status = XNB_STATE_INIT; 427 xnbp->xnb_fe_status = XNB_STATE_INIT; 428 429 xnbp->xnb_tx_buf_count = 0; 430 431 xnbp->xnb_rx_hv_copy = B_FALSE; 432 xnbp->xnb_multicast_control = B_FALSE; 433 434 xnbp->xnb_rx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 435 ASSERT(xnbp->xnb_rx_va != NULL); 436 437 if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie) 438 != DDI_SUCCESS) 439 goto failure; 440 441 /* Allocated on demand, when/if we enter xnb_copy_to_peer(). */ 442 xnbp->xnb_rx_cpop = NULL; 443 xnbp->xnb_rx_cpop_count = 0; 444 445 mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER, 446 xnbp->xnb_icookie); 447 mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER, 448 xnbp->xnb_icookie); 449 mutex_init(&xnbp->xnb_state_lock, NULL, MUTEX_DRIVER, 450 xnbp->xnb_icookie); 451 452 /* Set driver private pointer now. */ 453 ddi_set_driver_private(dip, xnbp); 454 455 (void) sprintf(cachename, "xnb_tx_buf_cache_%d", ddi_get_instance(dip)); 456 xnbp->xnb_tx_buf_cache = kmem_cache_create(cachename, 457 sizeof (xnb_txbuf_t), 0, 458 xnb_txbuf_constructor, xnb_txbuf_destructor, 459 NULL, xnbp, NULL, 0); 460 if (xnbp->xnb_tx_buf_cache == NULL) 461 goto failure_0; 462 463 if (!xnb_ks_init(xnbp)) 464 goto failure_1; 465 466 /* 467 * Receive notification of changes in the state of the 468 * driver in the guest domain. 469 */ 470 if (xvdi_add_event_handler(dip, XS_OE_STATE, xnb_oe_state_change, 471 NULL) != DDI_SUCCESS) 472 goto failure_2; 473 474 /* 475 * Receive notification of hotplug events. 476 */ 477 if (xvdi_add_event_handler(dip, XS_HP_STATE, xnb_hp_state_change, 478 NULL) != DDI_SUCCESS) 479 goto failure_2; 480 481 xsname = xvdi_get_xsname(dip); 482 483 if (xenbus_printf(XBT_NULL, xsname, 484 "feature-multicast-control", "%d", 485 xnb_multicast_control ? 1 : 0) != 0) 486 goto failure_3; 487 488 if (xenbus_printf(XBT_NULL, xsname, 489 "feature-rx-copy", "%d", 1) != 0) 490 goto failure_3; 491 /* 492 * Linux domUs seem to depend on "feature-rx-flip" being 0 493 * in addition to "feature-rx-copy" being 1. It seems strange 494 * to use four possible states to describe a binary decision, 495 * but we might as well play nice. 496 */ 497 if (xenbus_printf(XBT_NULL, xsname, 498 "feature-rx-flip", "%d", 0) != 0) 499 goto failure_3; 500 501 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait); 502 (void) xvdi_post_event(dip, XEN_HP_ADD); 503 504 return (DDI_SUCCESS); 505 506 failure_3: 507 xvdi_remove_event_handler(dip, NULL); 508 509 failure_2: 510 xnb_ks_free(xnbp); 511 512 failure_1: 513 kmem_cache_destroy(xnbp->xnb_tx_buf_cache); 514 515 failure_0: 516 mutex_destroy(&xnbp->xnb_state_lock); 517 mutex_destroy(&xnbp->xnb_rx_lock); 518 mutex_destroy(&xnbp->xnb_tx_lock); 519 520 failure: 521 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE); 522 kmem_free(xnbp, sizeof (*xnbp)); 523 return (DDI_FAILURE); 524 } 525 526 void 527 xnb_detach(dev_info_t *dip) 528 { 529 xnb_t *xnbp = ddi_get_driver_private(dip); 530 531 ASSERT(xnbp != NULL); 532 ASSERT(!xnbp->xnb_connected); 533 ASSERT(xnbp->xnb_tx_buf_count == 0); 534 535 xnb_disconnect_rings(dip); 536 537 xvdi_remove_event_handler(dip, NULL); 538 539 xnb_ks_free(xnbp); 540 541 kmem_cache_destroy(xnbp->xnb_tx_buf_cache); 542 543 ddi_set_driver_private(dip, NULL); 544 545 mutex_destroy(&xnbp->xnb_state_lock); 546 mutex_destroy(&xnbp->xnb_rx_lock); 547 mutex_destroy(&xnbp->xnb_tx_lock); 548 549 if (xnbp->xnb_rx_cpop_count > 0) 550 kmem_free(xnbp->xnb_rx_cpop, sizeof (xnbp->xnb_rx_cpop[0]) 551 * xnbp->xnb_rx_cpop_count); 552 553 ASSERT(xnbp->xnb_rx_va != NULL); 554 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE); 555 556 kmem_free(xnbp, sizeof (*xnbp)); 557 } 558 559 /* 560 * Allocate a page from the hypervisor to be flipped to the peer. 561 * 562 * Try to get pages in batches to reduce the overhead of calls into 563 * the balloon driver. 564 */ 565 static mfn_t 566 xnb_alloc_page(xnb_t *xnbp) 567 { 568 #define WARNING_RATE_LIMIT 100 569 #define BATCH_SIZE 256 570 static mfn_t mfns[BATCH_SIZE]; /* common across all instances */ 571 static int nth = BATCH_SIZE; 572 mfn_t mfn; 573 574 mutex_enter(&xnb_alloc_page_lock); 575 if (nth == BATCH_SIZE) { 576 if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) { 577 xnbp->xnb_stat_allocation_failure++; 578 mutex_exit(&xnb_alloc_page_lock); 579 580 /* 581 * Try for a single page in low memory situations. 582 */ 583 if (balloon_alloc_pages(1, &mfn) != 1) { 584 if ((xnbp->xnb_stat_small_allocation_failure++ 585 % WARNING_RATE_LIMIT) == 0) 586 cmn_err(CE_WARN, "xnb_alloc_page: " 587 "Cannot allocate memory to " 588 "transfer packets to peer."); 589 return (0); 590 } else { 591 xnbp->xnb_stat_small_allocation_success++; 592 return (mfn); 593 } 594 } 595 596 nth = 0; 597 xnbp->xnb_stat_allocation_success++; 598 } 599 600 mfn = mfns[nth++]; 601 mutex_exit(&xnb_alloc_page_lock); 602 603 ASSERT(mfn != 0); 604 605 return (mfn); 606 #undef BATCH_SIZE 607 #undef WARNING_RATE_LIMIT 608 } 609 610 /* 611 * Free a page back to the hypervisor. 612 * 613 * This happens only in the error path, so batching is not worth the 614 * complication. 615 */ 616 static void 617 xnb_free_page(xnb_t *xnbp, mfn_t mfn) 618 { 619 _NOTE(ARGUNUSED(xnbp)); 620 int r; 621 pfn_t pfn; 622 623 pfn = xen_assign_pfn(mfn); 624 pfnzero(pfn, 0, PAGESIZE); 625 xen_release_pfn(pfn); 626 627 if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) { 628 cmn_err(CE_WARN, "free_page: cannot decrease memory " 629 "reservation (%d): page kept but unusable (mfn = 0x%lx).", 630 r, mfn); 631 } 632 } 633 634 /* 635 * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but using 636 * local variables. Used in both xnb_to_peer() and xnb_copy_to_peer(). 637 */ 638 #define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \ 639 ((((_r)->sring->req_prod - loop) < \ 640 (RING_SIZE(_r) - (loop - prod))) ? \ 641 ((_r)->sring->req_prod - loop) : \ 642 (RING_SIZE(_r) - (loop - prod))) 643 644 /* 645 * Pass packets to the peer using page flipping. 646 */ 647 mblk_t * 648 xnb_to_peer(xnb_t *xnbp, mblk_t *mp) 649 { 650 mblk_t *free = mp, *prev = NULL; 651 size_t len; 652 gnttab_transfer_t *gop; 653 boolean_t notify; 654 RING_IDX loop, prod, end; 655 656 /* 657 * For each packet the sequence of operations is: 658 * 659 * 1. get a new page from the hypervisor. 660 * 2. get a request slot from the ring. 661 * 3. copy the data into the new page. 662 * 4. transfer the page to the peer. 663 * 5. update the request slot. 664 * 6. kick the peer. 665 * 7. free mp. 666 * 667 * In order to reduce the number of hypercalls, we prepare 668 * several packets for the peer and perform a single hypercall 669 * to transfer them. 670 */ 671 672 mutex_enter(&xnbp->xnb_rx_lock); 673 674 /* 675 * If we are not connected to the peer or have not yet 676 * finished hotplug it is too early to pass packets to the 677 * peer. 678 */ 679 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 680 mutex_exit(&xnbp->xnb_rx_lock); 681 DTRACE_PROBE(flip_rx_too_early); 682 xnbp->xnb_stat_rx_too_early++; 683 return (mp); 684 } 685 686 loop = xnbp->xnb_rx_ring.req_cons; 687 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 688 gop = xnbp->xnb_rx_top; 689 690 while ((mp != NULL) && 691 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 692 693 mfn_t mfn; 694 pfn_t pfn; 695 netif_rx_request_t *rxreq; 696 netif_rx_response_t *rxresp; 697 char *valoop; 698 mblk_t *ml; 699 uint16_t cksum_flags; 700 701 /* 1 */ 702 if ((mfn = xnb_alloc_page(xnbp)) == 0) { 703 xnbp->xnb_stat_rx_defer++; 704 break; 705 } 706 707 /* 2 */ 708 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 709 710 #ifdef XNB_DEBUG 711 if (!(rxreq->id < NET_RX_RING_SIZE)) 712 cmn_err(CE_PANIC, "xnb_to_peer: " 713 "id %d out of range in request 0x%p", 714 rxreq->id, (void *)rxreq); 715 #endif /* XNB_DEBUG */ 716 717 /* Assign a pfn and map the new page at the allocated va. */ 718 pfn = xen_assign_pfn(mfn); 719 hat_devload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE, 720 pfn, PROT_READ | PROT_WRITE, HAT_LOAD); 721 722 /* 3 */ 723 len = 0; 724 valoop = xnbp->xnb_rx_va; 725 for (ml = mp; ml != NULL; ml = ml->b_cont) { 726 size_t chunk = ml->b_wptr - ml->b_rptr; 727 728 bcopy(ml->b_rptr, valoop, chunk); 729 valoop += chunk; 730 len += chunk; 731 } 732 733 ASSERT(len < PAGESIZE); 734 735 /* Release the pfn. */ 736 hat_unload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE, 737 HAT_UNLOAD_UNMAP); 738 xen_release_pfn(pfn); 739 740 /* 4 */ 741 gop->mfn = mfn; 742 gop->domid = xnbp->xnb_peer; 743 gop->ref = rxreq->gref; 744 745 /* 5.1 */ 746 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 747 rxresp->offset = 0; 748 rxresp->flags = 0; 749 750 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 751 if (cksum_flags != 0) 752 xnbp->xnb_stat_rx_cksum_deferred++; 753 rxresp->flags |= cksum_flags; 754 755 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 756 rxresp->status = len; 757 758 loop++; 759 prod++; 760 gop++; 761 prev = mp; 762 mp = mp->b_next; 763 } 764 765 /* 766 * Did we actually do anything? 767 */ 768 if (loop == xnbp->xnb_rx_ring.req_cons) { 769 mutex_exit(&xnbp->xnb_rx_lock); 770 return (mp); 771 } 772 773 end = loop; 774 775 /* 776 * Unlink the end of the 'done' list from the remainder. 777 */ 778 ASSERT(prev != NULL); 779 prev->b_next = NULL; 780 781 if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_rx_top, 782 loop - xnbp->xnb_rx_ring.req_cons) != 0) { 783 cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed"); 784 } 785 786 loop = xnbp->xnb_rx_ring.req_cons; 787 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 788 gop = xnbp->xnb_rx_top; 789 790 while (loop < end) { 791 int16_t status = NETIF_RSP_OKAY; 792 793 if (gop->status != 0) { 794 status = NETIF_RSP_ERROR; 795 796 /* 797 * If the status is anything other than 798 * GNTST_bad_page then we don't own the page 799 * any more, so don't try to give it back. 800 */ 801 if (gop->status != GNTST_bad_page) 802 gop->mfn = 0; 803 } else { 804 /* The page is no longer ours. */ 805 gop->mfn = 0; 806 } 807 808 if (gop->mfn != 0) 809 /* 810 * Give back the page, as we won't be using 811 * it. 812 */ 813 xnb_free_page(xnbp, gop->mfn); 814 else 815 /* 816 * We gave away a page, update our accounting 817 * now. 818 */ 819 balloon_drv_subtracted(1); 820 821 /* 5.2 */ 822 if (status != NETIF_RSP_OKAY) { 823 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 824 status; 825 } else { 826 xnbp->xnb_stat_ipackets++; 827 xnbp->xnb_stat_rbytes += len; 828 } 829 830 loop++; 831 prod++; 832 gop++; 833 } 834 835 xnbp->xnb_rx_ring.req_cons = loop; 836 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 837 838 /* 6 */ 839 /* LINTED: constant in conditional context */ 840 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 841 if (notify) { 842 ec_notify_via_evtchn(xnbp->xnb_evtchn); 843 xnbp->xnb_stat_rx_notify_sent++; 844 } else { 845 xnbp->xnb_stat_rx_notify_deferred++; 846 } 847 848 if (mp != NULL) 849 xnbp->xnb_stat_rx_defer++; 850 851 mutex_exit(&xnbp->xnb_rx_lock); 852 853 /* Free mblk_t's that we consumed. */ 854 freemsgchain(free); 855 856 return (mp); 857 } 858 859 /* Helper functions for xnb_copy_to_peer(). */ 860 861 /* 862 * Grow the array of copy operation descriptors. 863 */ 864 static boolean_t 865 grow_cpop_area(xnb_t *xnbp) 866 { 867 size_t count; 868 gnttab_copy_t *new; 869 870 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 871 872 count = xnbp->xnb_rx_cpop_count + CPOP_DEFCNT; 873 874 if ((new = kmem_alloc(sizeof (new[0]) * count, KM_NOSLEEP)) == NULL) { 875 xnbp->xnb_stat_other_allocation_failure++; 876 return (B_FALSE); 877 } 878 879 bcopy(xnbp->xnb_rx_cpop, new, 880 sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count); 881 882 kmem_free(xnbp->xnb_rx_cpop, 883 sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count); 884 885 xnbp->xnb_rx_cpop = new; 886 xnbp->xnb_rx_cpop_count = count; 887 888 xnbp->xnb_stat_rx_cpoparea_grown++; 889 890 return (B_TRUE); 891 } 892 893 /* 894 * Check whether an address is on a page that's foreign to this domain. 895 */ 896 static boolean_t 897 is_foreign(void *addr) 898 { 899 pfn_t pfn = hat_getpfnum(kas.a_hat, addr); 900 901 return ((pfn & PFN_IS_FOREIGN_MFN) == PFN_IS_FOREIGN_MFN); 902 } 903 904 /* 905 * Insert a newly allocated mblk into a chain, replacing the old one. 906 */ 907 static mblk_t * 908 replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev) 909 { 910 uint32_t start, stuff, end, value, flags; 911 mblk_t *new_mp; 912 913 new_mp = copyb(mp); 914 if (new_mp == NULL) 915 cmn_err(CE_PANIC, "replace_msg: cannot alloc new message" 916 "for %p, len %lu", (void *) mp, len); 917 918 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 919 (void) hcksum_assoc(new_mp, NULL, NULL, start, stuff, end, value, 920 flags, KM_NOSLEEP); 921 922 new_mp->b_next = mp->b_next; 923 new_mp->b_prev = mp->b_prev; 924 new_mp->b_cont = mp->b_cont; 925 926 /* Make sure we only overwrite pointers to the mblk being replaced. */ 927 if (mp_prev != NULL && mp_prev->b_next == mp) 928 mp_prev->b_next = new_mp; 929 930 if (ml_prev != NULL && ml_prev->b_cont == mp) 931 ml_prev->b_cont = new_mp; 932 933 mp->b_next = mp->b_prev = mp->b_cont = NULL; 934 freemsg(mp); 935 936 return (new_mp); 937 } 938 939 /* 940 * Set all the fields in a gnttab_copy_t. 941 */ 942 static void 943 setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr, 944 size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref) 945 { 946 ASSERT(xnbp != NULL && gp != NULL); 947 948 gp->source.offset = s_off; 949 gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr)); 950 gp->source.domid = DOMID_SELF; 951 952 gp->len = (uint16_t)len; 953 gp->flags = GNTCOPY_dest_gref; 954 gp->status = 0; 955 956 gp->dest.u.ref = d_ref; 957 gp->dest.offset = d_off; 958 gp->dest.domid = xnbp->xnb_peer; 959 } 960 961 /* 962 * Pass packets to the peer using hypervisor copy operations. 963 */ 964 mblk_t * 965 xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp) 966 { 967 mblk_t *free = mp, *mp_prev = NULL, *saved_mp = mp; 968 mblk_t *ml, *ml_prev; 969 boolean_t notify; 970 RING_IDX loop, prod; 971 int i; 972 973 /* 974 * If the peer does not pre-post buffers for received packets, 975 * use page flipping to pass packets to it. 976 */ 977 if (!xnbp->xnb_rx_hv_copy) 978 return (xnb_to_peer(xnbp, mp)); 979 980 /* 981 * For each packet the sequence of operations is: 982 * 983 * 1. get a request slot from the ring. 984 * 2. set up data for hypercall (see NOTE below) 985 * 3. have the hypervisore copy the data 986 * 4. update the request slot. 987 * 5. kick the peer. 988 * 989 * NOTE ad 2. 990 * In order to reduce the number of hypercalls, we prepare 991 * several mblks (mp->b_cont != NULL) for the peer and 992 * perform a single hypercall to transfer them. We also have 993 * to set up a seperate copy operation for every page. 994 * 995 * If we have more than one packet (mp->b_next != NULL), we do 996 * this whole dance repeatedly. 997 */ 998 999 mutex_enter(&xnbp->xnb_rx_lock); 1000 1001 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 1002 mutex_exit(&xnbp->xnb_rx_lock); 1003 DTRACE_PROBE(copy_rx_too_early); 1004 xnbp->xnb_stat_rx_too_early++; 1005 return (mp); 1006 } 1007 1008 loop = xnbp->xnb_rx_ring.req_cons; 1009 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 1010 1011 while ((mp != NULL) && 1012 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 1013 netif_rx_request_t *rxreq; 1014 size_t d_offset, len; 1015 int item_count; 1016 gnttab_copy_t *gop_cp; 1017 netif_rx_response_t *rxresp; 1018 uint16_t cksum_flags; 1019 int16_t status = NETIF_RSP_OKAY; 1020 1021 /* 1 */ 1022 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 1023 1024 #ifdef XNB_DEBUG 1025 if (!(rxreq->id < NET_RX_RING_SIZE)) 1026 cmn_err(CE_PANIC, "xnb_copy_to_peer: " 1027 "id %d out of range in request 0x%p", 1028 rxreq->id, (void *)rxreq); 1029 #endif /* XNB_DEBUG */ 1030 1031 /* 2 */ 1032 d_offset = 0; 1033 len = 0; 1034 item_count = 0; 1035 1036 gop_cp = xnbp->xnb_rx_cpop; 1037 1038 /* 1039 * We walk the b_cont pointers and set up a 1040 * gnttab_copy_t for each sub-page chunk in each data 1041 * block. 1042 */ 1043 /* 2a */ 1044 for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) { 1045 size_t chunk = ml->b_wptr - ml->b_rptr; 1046 uchar_t *r_tmp, *rpt_align; 1047 size_t r_offset; 1048 1049 /* 1050 * The hypervisor will not allow us to 1051 * reference a foreign page (e.g. one 1052 * belonging to another domain) by mfn in the 1053 * copy operation. If the data in this mblk is 1054 * on such a page we must copy the data into a 1055 * local page before initiating the hypervisor 1056 * copy operation. 1057 */ 1058 if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) { 1059 mblk_t *ml_new = replace_msg(ml, chunk, 1060 mp_prev, ml_prev); 1061 1062 /* We can still use old ml, but not *ml! */ 1063 if (free == ml) 1064 free = ml_new; 1065 if (mp == ml) 1066 mp = ml_new; 1067 ml = ml_new; 1068 1069 xnbp->xnb_stat_rx_foreign_page++; 1070 } 1071 1072 rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr); 1073 r_offset = (uint16_t)(ml->b_rptr - rpt_align); 1074 r_tmp = ml->b_rptr; 1075 1076 if (d_offset + chunk > PAGESIZE) 1077 cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p " 1078 "(svd: %p), ml %p,rpt_alg. %p, d_offset " 1079 "(%lu) + chunk (%lu) > PAGESIZE %d!", 1080 (void *)mp, (void *)saved_mp, (void *)ml, 1081 (void *)rpt_align, 1082 d_offset, chunk, (int)PAGESIZE); 1083 1084 while (chunk > 0) { 1085 size_t part_len; 1086 1087 if (item_count == xnbp->xnb_rx_cpop_count) { 1088 if (!grow_cpop_area(xnbp)) 1089 goto failure; 1090 gop_cp = &xnbp->xnb_rx_cpop[item_count]; 1091 } 1092 /* 1093 * If our mblk crosses a page boundary, we need 1094 * to do a seperate copy for each page. 1095 */ 1096 if (r_offset + chunk > PAGESIZE) { 1097 part_len = PAGESIZE - r_offset; 1098 1099 DTRACE_PROBE3(mblk_page_crossed, 1100 (mblk_t *), ml, int, chunk, int, 1101 (int)r_offset); 1102 1103 xnbp->xnb_stat_rx_pagebndry_crossed++; 1104 } else { 1105 part_len = chunk; 1106 } 1107 1108 setup_gop(xnbp, gop_cp, r_tmp, r_offset, 1109 d_offset, part_len, rxreq->gref); 1110 1111 chunk -= part_len; 1112 1113 len += part_len; 1114 d_offset += part_len; 1115 r_tmp += part_len; 1116 /* 1117 * The 2nd, 3rd ... last copies will always 1118 * start at r_tmp, therefore r_offset is 0. 1119 */ 1120 r_offset = 0; 1121 gop_cp++; 1122 item_count++; 1123 } 1124 ml_prev = ml; 1125 1126 DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int, 1127 chunk, int, len, int, item_count); 1128 } 1129 /* 3 */ 1130 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_rx_cpop, 1131 item_count) != 0) { 1132 cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed"); 1133 DTRACE_PROBE(HV_granttableopfailed); 1134 } 1135 1136 /* 4 */ 1137 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 1138 rxresp->offset = 0; 1139 1140 rxresp->flags = 0; 1141 1142 DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int, 1143 (int)rxresp->offset, int, (int)rxresp->flags, int, 1144 (int)rxresp->status); 1145 1146 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 1147 if (cksum_flags != 0) 1148 xnbp->xnb_stat_rx_cksum_deferred++; 1149 rxresp->flags |= cksum_flags; 1150 1151 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 1152 rxresp->status = len; 1153 1154 DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int, 1155 (int)rxresp->offset, int, (int)rxresp->flags, int, 1156 (int)rxresp->status); 1157 1158 for (i = 0; i < item_count; i++) { 1159 if (xnbp->xnb_rx_cpop[i].status != 0) { 1160 DTRACE_PROBE2(cpop_status_nonnull, int, 1161 (int)xnbp->xnb_rx_cpop[i].status, 1162 int, i); 1163 status = NETIF_RSP_ERROR; 1164 } 1165 } 1166 1167 /* 5.2 */ 1168 if (status != NETIF_RSP_OKAY) { 1169 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 1170 status; 1171 xnbp->xnb_stat_rx_rsp_notok++; 1172 } else { 1173 xnbp->xnb_stat_ipackets++; 1174 xnbp->xnb_stat_rbytes += len; 1175 } 1176 1177 loop++; 1178 prod++; 1179 mp_prev = mp; 1180 mp = mp->b_next; 1181 } 1182 failure: 1183 /* 1184 * Did we actually do anything? 1185 */ 1186 if (loop == xnbp->xnb_rx_ring.req_cons) { 1187 mutex_exit(&xnbp->xnb_rx_lock); 1188 return (mp); 1189 } 1190 1191 /* 1192 * Unlink the end of the 'done' list from the remainder. 1193 */ 1194 ASSERT(mp_prev != NULL); 1195 mp_prev->b_next = NULL; 1196 1197 xnbp->xnb_rx_ring.req_cons = loop; 1198 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 1199 1200 /* 6 */ 1201 /* LINTED: constant in conditional context */ 1202 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 1203 if (notify) { 1204 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1205 xnbp->xnb_stat_rx_notify_sent++; 1206 } else { 1207 xnbp->xnb_stat_rx_notify_deferred++; 1208 } 1209 1210 if (mp != NULL) 1211 xnbp->xnb_stat_rx_defer++; 1212 1213 mutex_exit(&xnbp->xnb_rx_lock); 1214 1215 /* Free mblk_t structs we have consumed. */ 1216 freemsgchain(free); 1217 1218 return (mp); 1219 } 1220 1221 1222 static void 1223 xnb_tx_notify_peer(xnb_t *xnbp, boolean_t force) 1224 { 1225 boolean_t notify; 1226 1227 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1228 1229 /* LINTED: constant in conditional context */ 1230 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify); 1231 if (notify || force) { 1232 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1233 xnbp->xnb_stat_tx_notify_sent++; 1234 } else { 1235 xnbp->xnb_stat_tx_notify_deferred++; 1236 } 1237 } 1238 1239 static void 1240 xnb_tx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status) 1241 { 1242 RING_IDX i; 1243 netif_tx_response_t *txresp; 1244 1245 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1246 1247 i = xnbp->xnb_tx_ring.rsp_prod_pvt; 1248 1249 txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i); 1250 txresp->id = id; 1251 txresp->status = status; 1252 1253 xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1; 1254 1255 /* 1256 * Note that we don't push the change to the peer here - that 1257 * is the callers responsibility. 1258 */ 1259 } 1260 1261 static void 1262 xnb_txbuf_recycle(xnb_txbuf_t *txp) 1263 { 1264 xnb_t *xnbp = txp->xt_xnbp; 1265 1266 kmem_cache_free(xnbp->xnb_tx_buf_cache, txp); 1267 1268 xnbp->xnb_tx_buf_outstanding--; 1269 } 1270 1271 static int 1272 xnb_txbuf_constructor(void *buf, void *arg, int kmflag) 1273 { 1274 _NOTE(ARGUNUSED(kmflag)); 1275 xnb_txbuf_t *txp = buf; 1276 xnb_t *xnbp = arg; 1277 size_t len; 1278 ddi_dma_cookie_t dma_cookie; 1279 uint_t ncookies; 1280 1281 txp->xt_free_rtn.free_func = xnb_txbuf_recycle; 1282 txp->xt_free_rtn.free_arg = (caddr_t)txp; 1283 txp->xt_xnbp = xnbp; 1284 txp->xt_next = NULL; 1285 1286 if (ddi_dma_alloc_handle(xnbp->xnb_devinfo, &buf_dma_attr, 1287 0, 0, &txp->xt_dma_handle) != DDI_SUCCESS) 1288 goto failure; 1289 1290 if (ddi_dma_mem_alloc(txp->xt_dma_handle, PAGESIZE, &data_accattr, 1291 DDI_DMA_STREAMING, 0, 0, &txp->xt_buf, &len, 1292 &txp->xt_acc_handle) != DDI_SUCCESS) 1293 goto failure_1; 1294 1295 if (ddi_dma_addr_bind_handle(txp->xt_dma_handle, NULL, txp->xt_buf, 1296 len, DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 0, 1297 &dma_cookie, &ncookies) 1298 != DDI_DMA_MAPPED) 1299 goto failure_2; 1300 ASSERT(ncookies == 1); 1301 1302 txp->xt_mfn = xnb_btop(dma_cookie.dmac_laddress); 1303 txp->xt_buflen = dma_cookie.dmac_size; 1304 1305 DTRACE_PROBE(txbuf_allocated); 1306 1307 atomic_add_32(&xnbp->xnb_tx_buf_count, 1); 1308 xnbp->xnb_tx_buf_outstanding++; 1309 1310 return (0); 1311 1312 failure_2: 1313 ddi_dma_mem_free(&txp->xt_acc_handle); 1314 1315 failure_1: 1316 ddi_dma_free_handle(&txp->xt_dma_handle); 1317 1318 failure: 1319 1320 return (-1); 1321 } 1322 1323 static void 1324 xnb_txbuf_destructor(void *buf, void *arg) 1325 { 1326 xnb_txbuf_t *txp = buf; 1327 xnb_t *xnbp = arg; 1328 1329 (void) ddi_dma_unbind_handle(txp->xt_dma_handle); 1330 ddi_dma_mem_free(&txp->xt_acc_handle); 1331 ddi_dma_free_handle(&txp->xt_dma_handle); 1332 1333 atomic_add_32(&xnbp->xnb_tx_buf_count, -1); 1334 } 1335 1336 /* 1337 * Take packets from the peer and deliver them onward. 1338 */ 1339 static mblk_t * 1340 xnb_from_peer(xnb_t *xnbp) 1341 { 1342 RING_IDX start, end, loop; 1343 gnttab_copy_t *cop; 1344 xnb_txbuf_t **txpp; 1345 netif_tx_request_t *txreq; 1346 boolean_t work_to_do, need_notify = B_FALSE; 1347 mblk_t *head, *tail; 1348 int n_data_req, i; 1349 1350 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1351 1352 head = tail = NULL; 1353 around: 1354 1355 /* LINTED: constant in conditional context */ 1356 RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do); 1357 if (!work_to_do) { 1358 finished: 1359 xnb_tx_notify_peer(xnbp, need_notify); 1360 1361 return (head); 1362 } 1363 1364 start = xnbp->xnb_tx_ring.req_cons; 1365 end = xnbp->xnb_tx_ring.sring->req_prod; 1366 1367 if ((end - start) > NET_TX_RING_SIZE) { 1368 /* 1369 * This usually indicates that the frontend driver is 1370 * misbehaving, as it's not possible to have more than 1371 * NET_TX_RING_SIZE ring elements in play at any one 1372 * time. 1373 * 1374 * We reset the ring pointers to the state declared by 1375 * the frontend and try to carry on. 1376 */ 1377 cmn_err(CE_WARN, "xnb_from_peer: domain %d tried to give us %u " 1378 "items in the ring, resetting and trying to recover.", 1379 xnbp->xnb_peer, (end - start)); 1380 1381 /* LINTED: constant in conditional context */ 1382 BACK_RING_ATTACH(&xnbp->xnb_tx_ring, 1383 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1384 1385 goto around; 1386 } 1387 1388 loop = start; 1389 cop = xnbp->xnb_tx_cop; 1390 txpp = xnbp->xnb_tx_bufp; 1391 n_data_req = 0; 1392 1393 while (loop < end) { 1394 static const uint16_t acceptable_flags = 1395 NETTXF_csum_blank | 1396 NETTXF_data_validated | 1397 NETTXF_extra_info; 1398 uint16_t unexpected_flags; 1399 1400 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop); 1401 1402 unexpected_flags = txreq->flags & ~acceptable_flags; 1403 if (unexpected_flags != 0) { 1404 /* 1405 * The peer used flag bits that we do not 1406 * recognize. 1407 */ 1408 cmn_err(CE_WARN, "xnb_from_peer: " 1409 "unexpected flag bits (0x%x) from peer " 1410 "in transmit request", 1411 unexpected_flags); 1412 xnbp->xnb_stat_tx_unexpected_flags++; 1413 1414 /* Mark this entry as failed. */ 1415 xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR); 1416 need_notify = B_TRUE; 1417 1418 } else if (txreq->flags & NETTXF_extra_info) { 1419 struct netif_extra_info *erp; 1420 boolean_t status; 1421 1422 loop++; /* Consume another slot in the ring. */ 1423 ASSERT(loop <= end); 1424 1425 erp = (struct netif_extra_info *) 1426 RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop); 1427 1428 switch (erp->type) { 1429 case XEN_NETIF_EXTRA_TYPE_MCAST_ADD: 1430 ASSERT(xnbp->xnb_multicast_control); 1431 status = xnbp->xnb_flavour->xf_mcast_add(xnbp, 1432 &erp->u.mcast.addr); 1433 break; 1434 case XEN_NETIF_EXTRA_TYPE_MCAST_DEL: 1435 ASSERT(xnbp->xnb_multicast_control); 1436 status = xnbp->xnb_flavour->xf_mcast_del(xnbp, 1437 &erp->u.mcast.addr); 1438 break; 1439 default: 1440 status = B_FALSE; 1441 cmn_err(CE_WARN, "xnb_from_peer: " 1442 "unknown extra type %d", erp->type); 1443 break; 1444 } 1445 1446 xnb_tx_mark_complete(xnbp, txreq->id, 1447 status ? NETIF_RSP_OKAY : NETIF_RSP_ERROR); 1448 need_notify = B_TRUE; 1449 1450 } else if ((txreq->offset > PAGESIZE) || 1451 (txreq->offset + txreq->size > PAGESIZE)) { 1452 /* 1453 * Peer attempted to refer to data beyond the 1454 * end of the granted page. 1455 */ 1456 cmn_err(CE_WARN, "xnb_from_peer: " 1457 "attempt to refer beyond the end of granted " 1458 "page in txreq (offset %d, size %d).", 1459 txreq->offset, txreq->size); 1460 xnbp->xnb_stat_tx_overflow_page++; 1461 1462 /* Mark this entry as failed. */ 1463 xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR); 1464 need_notify = B_TRUE; 1465 1466 } else { 1467 xnb_txbuf_t *txp; 1468 1469 txp = kmem_cache_alloc(xnbp->xnb_tx_buf_cache, 1470 KM_NOSLEEP); 1471 if (txp == NULL) 1472 break; 1473 1474 txp->xt_mblk = desballoc((unsigned char *)txp->xt_buf, 1475 txp->xt_buflen, 0, &txp->xt_free_rtn); 1476 if (txp->xt_mblk == NULL) { 1477 kmem_cache_free(xnbp->xnb_tx_buf_cache, txp); 1478 break; 1479 } 1480 1481 txp->xt_idx = loop; 1482 txp->xt_id = txreq->id; 1483 1484 cop->source.u.ref = txreq->gref; 1485 cop->source.domid = xnbp->xnb_peer; 1486 cop->source.offset = txreq->offset; 1487 1488 cop->dest.u.gmfn = txp->xt_mfn; 1489 cop->dest.domid = DOMID_SELF; 1490 cop->dest.offset = 0; 1491 1492 cop->len = txreq->size; 1493 cop->flags = GNTCOPY_source_gref; 1494 cop->status = 0; 1495 1496 *txpp = txp; 1497 1498 txpp++; 1499 cop++; 1500 n_data_req++; 1501 1502 ASSERT(n_data_req <= NET_TX_RING_SIZE); 1503 } 1504 1505 loop++; 1506 } 1507 1508 xnbp->xnb_tx_ring.req_cons = loop; 1509 1510 if (n_data_req == 0) 1511 goto around; 1512 1513 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, 1514 xnbp->xnb_tx_cop, n_data_req) != 0) { 1515 1516 cmn_err(CE_WARN, "xnb_from_peer: copy operation failed"); 1517 1518 txpp = xnbp->xnb_tx_bufp; 1519 i = n_data_req; 1520 while (i > 0) { 1521 kmem_cache_free(xnbp->xnb_tx_buf_cache, *txpp); 1522 txpp++; 1523 i--; 1524 } 1525 1526 goto finished; 1527 } 1528 1529 txpp = xnbp->xnb_tx_bufp; 1530 cop = xnbp->xnb_tx_cop; 1531 i = n_data_req; 1532 1533 while (i > 0) { 1534 xnb_txbuf_t *txp = *txpp; 1535 1536 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, txp->xt_idx); 1537 1538 if (cop->status != 0) { 1539 #ifdef XNB_DEBUG 1540 cmn_err(CE_WARN, "xnb_from_peer: " 1541 "txpp 0x%p failed (%d)", 1542 (void *)*txpp, cop->status); 1543 #endif /* XNB_DEBUG */ 1544 xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_ERROR); 1545 freemsg(txp->xt_mblk); 1546 } else { 1547 mblk_t *mp; 1548 1549 mp = txp->xt_mblk; 1550 mp->b_rptr = mp->b_wptr = (unsigned char *)txp->xt_buf; 1551 mp->b_wptr += txreq->size; 1552 mp->b_next = NULL; 1553 1554 /* 1555 * If there are checksum flags, process them 1556 * appropriately. 1557 */ 1558 if ((txreq->flags & 1559 (NETTXF_csum_blank | NETTXF_data_validated)) 1560 != 0) { 1561 mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp, 1562 mp, txreq->flags); 1563 xnbp->xnb_stat_tx_cksum_no_need++; 1564 1565 txp->xt_mblk = mp; 1566 } 1567 1568 if (head == NULL) { 1569 ASSERT(tail == NULL); 1570 head = mp; 1571 } else { 1572 ASSERT(tail != NULL); 1573 tail->b_next = mp; 1574 } 1575 tail = mp; 1576 1577 xnbp->xnb_stat_opackets++; 1578 xnbp->xnb_stat_obytes += txreq->size; 1579 1580 xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_OKAY); 1581 } 1582 1583 txpp++; 1584 cop++; 1585 i--; 1586 } 1587 1588 goto around; 1589 /* NOTREACHED */ 1590 } 1591 1592 static uint_t 1593 xnb_intr(caddr_t arg) 1594 { 1595 xnb_t *xnbp = (xnb_t *)arg; 1596 mblk_t *mp; 1597 1598 xnbp->xnb_stat_intr++; 1599 1600 mutex_enter(&xnbp->xnb_tx_lock); 1601 1602 ASSERT(xnbp->xnb_connected); 1603 1604 mp = xnb_from_peer(xnbp); 1605 1606 mutex_exit(&xnbp->xnb_tx_lock); 1607 1608 if (!xnbp->xnb_hotplugged) { 1609 xnbp->xnb_stat_tx_too_early++; 1610 goto fail; 1611 } 1612 if (mp == NULL) { 1613 xnbp->xnb_stat_spurious_intr++; 1614 goto fail; 1615 } 1616 1617 xnbp->xnb_flavour->xf_from_peer(xnbp, mp); 1618 1619 return (DDI_INTR_CLAIMED); 1620 1621 fail: 1622 freemsgchain(mp); 1623 return (DDI_INTR_CLAIMED); 1624 } 1625 1626 /* 1627 * Read our configuration from xenstore. 1628 */ 1629 boolean_t 1630 xnb_read_xs_config(xnb_t *xnbp) 1631 { 1632 char *xsname; 1633 char mac[ETHERADDRL * 3]; 1634 1635 xsname = xvdi_get_xsname(xnbp->xnb_devinfo); 1636 1637 if (xenbus_scanf(XBT_NULL, xsname, 1638 "mac", "%s", mac) != 0) { 1639 cmn_err(CE_WARN, "xnb_attach: " 1640 "cannot read mac address from %s", 1641 xsname); 1642 return (B_FALSE); 1643 } 1644 1645 if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) { 1646 cmn_err(CE_WARN, 1647 "xnb_attach: cannot parse mac address %s", 1648 mac); 1649 return (B_FALSE); 1650 } 1651 1652 return (B_TRUE); 1653 } 1654 1655 /* 1656 * Read the configuration of the peer from xenstore. 1657 */ 1658 boolean_t 1659 xnb_read_oe_config(xnb_t *xnbp) 1660 { 1661 char *oename; 1662 int i; 1663 1664 oename = xvdi_get_oename(xnbp->xnb_devinfo); 1665 1666 if (xenbus_gather(XBT_NULL, oename, 1667 "event-channel", "%u", &xnbp->xnb_fe_evtchn, 1668 "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref, 1669 "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref, 1670 NULL) != 0) { 1671 cmn_err(CE_WARN, "xnb_read_oe_config: " 1672 "cannot read other-end details from %s", 1673 oename); 1674 return (B_FALSE); 1675 } 1676 1677 /* 1678 * Check whether our peer requests receive side hypervisor 1679 * copy. 1680 */ 1681 if (xenbus_scanf(XBT_NULL, oename, 1682 "request-rx-copy", "%d", &i) != 0) 1683 i = 0; 1684 if (i != 0) 1685 xnbp->xnb_rx_hv_copy = B_TRUE; 1686 1687 /* 1688 * Check whether our peer requests multicast_control. 1689 */ 1690 if (xenbus_scanf(XBT_NULL, oename, 1691 "request-multicast-control", "%d", &i) != 0) 1692 i = 0; 1693 if (i != 0) 1694 xnbp->xnb_multicast_control = B_TRUE; 1695 1696 /* 1697 * The Linux backend driver here checks to see if the peer has 1698 * set 'feature-no-csum-offload'. This is used to indicate 1699 * that the guest cannot handle receiving packets without a 1700 * valid checksum. We don't check here, because packets passed 1701 * to the peer _always_ have a valid checksum. 1702 * 1703 * There are three cases: 1704 * 1705 * - the NIC is dedicated: packets from the wire should always 1706 * have a valid checksum. If the hardware validates the 1707 * checksum then the relevant bit will be set in the packet 1708 * attributes and we will inform the peer. It can choose to 1709 * ignore the hardware verification. 1710 * 1711 * - the NIC is shared (VNIC) and a packet originates from the 1712 * wire: this is the same as the case above - the packets 1713 * will have a valid checksum. 1714 * 1715 * - the NIC is shared (VNIC) and a packet originates from the 1716 * host: the MAC layer ensures that all such packets have a 1717 * valid checksum by calculating one if the stack did not. 1718 */ 1719 1720 return (B_TRUE); 1721 } 1722 1723 void 1724 xnb_start_connect(xnb_t *xnbp) 1725 { 1726 dev_info_t *dip = xnbp->xnb_devinfo; 1727 1728 if (!xnb_connect_rings(dip)) { 1729 cmn_err(CE_WARN, "xnb_start_connect: " 1730 "cannot connect rings"); 1731 goto failed; 1732 } 1733 1734 if (!xnbp->xnb_flavour->xf_start_connect(xnbp)) { 1735 cmn_err(CE_WARN, "xnb_start_connect: " 1736 "flavour failed to connect"); 1737 goto failed; 1738 } 1739 1740 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1741 return; 1742 1743 failed: 1744 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1745 xnb_disconnect_rings(dip); 1746 (void) xvdi_switch_state(dip, XBT_NULL, 1747 XenbusStateClosed); 1748 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1749 } 1750 1751 static boolean_t 1752 xnb_connect_rings(dev_info_t *dip) 1753 { 1754 xnb_t *xnbp = ddi_get_driver_private(dip); 1755 struct gnttab_map_grant_ref map_op; 1756 1757 /* 1758 * Cannot attempt to connect the rings if already connected. 1759 */ 1760 ASSERT(!xnbp->xnb_connected); 1761 1762 /* 1763 * 1. allocate a vaddr for the tx page, one for the rx page. 1764 * 2. call GNTTABOP_map_grant_ref to map the relevant pages 1765 * into the allocated vaddr (one for tx, one for rx). 1766 * 3. call EVTCHNOP_bind_interdomain to have the event channel 1767 * bound to this domain. 1768 * 4. associate the event channel with an interrupt. 1769 * 5. enable the interrupt. 1770 */ 1771 1772 /* 1.tx */ 1773 xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1774 0, 0, 0, 0, VM_SLEEP); 1775 ASSERT(xnbp->xnb_tx_ring_addr != NULL); 1776 1777 /* 2.tx */ 1778 map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr); 1779 map_op.flags = GNTMAP_host_map; 1780 map_op.ref = xnbp->xnb_tx_ring_ref; 1781 map_op.dom = xnbp->xnb_peer; 1782 hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr, NULL); 1783 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 || 1784 map_op.status != 0) { 1785 cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page."); 1786 goto fail; 1787 } 1788 xnbp->xnb_tx_ring_handle = map_op.handle; 1789 1790 /* LINTED: constant in conditional context */ 1791 BACK_RING_INIT(&xnbp->xnb_tx_ring, 1792 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1793 1794 /* 1.rx */ 1795 xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1796 0, 0, 0, 0, VM_SLEEP); 1797 ASSERT(xnbp->xnb_rx_ring_addr != NULL); 1798 1799 /* 2.rx */ 1800 map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr); 1801 map_op.flags = GNTMAP_host_map; 1802 map_op.ref = xnbp->xnb_rx_ring_ref; 1803 map_op.dom = xnbp->xnb_peer; 1804 hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr, NULL); 1805 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 || 1806 map_op.status != 0) { 1807 cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page."); 1808 goto fail; 1809 } 1810 xnbp->xnb_rx_ring_handle = map_op.handle; 1811 1812 /* LINTED: constant in conditional context */ 1813 BACK_RING_INIT(&xnbp->xnb_rx_ring, 1814 (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE); 1815 1816 /* 3 */ 1817 if (xvdi_bind_evtchn(dip, xnbp->xnb_fe_evtchn) != DDI_SUCCESS) { 1818 cmn_err(CE_WARN, "xnb_connect_rings: " 1819 "cannot bind event channel %d", xnbp->xnb_evtchn); 1820 xnbp->xnb_evtchn = INVALID_EVTCHN; 1821 goto fail; 1822 } 1823 xnbp->xnb_evtchn = xvdi_get_evtchn(dip); 1824 1825 /* 1826 * It would be good to set the state to XenbusStateConnected 1827 * here as well, but then what if ddi_add_intr() failed? 1828 * Changing the state in the store will be noticed by the peer 1829 * and cannot be "taken back". 1830 */ 1831 mutex_enter(&xnbp->xnb_tx_lock); 1832 mutex_enter(&xnbp->xnb_rx_lock); 1833 1834 xnbp->xnb_connected = B_TRUE; 1835 1836 mutex_exit(&xnbp->xnb_rx_lock); 1837 mutex_exit(&xnbp->xnb_tx_lock); 1838 1839 /* 4, 5 */ 1840 if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp) 1841 != DDI_SUCCESS) { 1842 cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt"); 1843 goto fail; 1844 } 1845 xnbp->xnb_irq = B_TRUE; 1846 1847 return (B_TRUE); 1848 1849 fail: 1850 mutex_enter(&xnbp->xnb_tx_lock); 1851 mutex_enter(&xnbp->xnb_rx_lock); 1852 1853 xnbp->xnb_connected = B_FALSE; 1854 1855 mutex_exit(&xnbp->xnb_rx_lock); 1856 mutex_exit(&xnbp->xnb_tx_lock); 1857 1858 return (B_FALSE); 1859 } 1860 1861 static void 1862 xnb_disconnect_rings(dev_info_t *dip) 1863 { 1864 xnb_t *xnbp = ddi_get_driver_private(dip); 1865 1866 if (xnbp->xnb_irq) { 1867 ddi_remove_intr(dip, 0, NULL); 1868 xnbp->xnb_irq = B_FALSE; 1869 } 1870 1871 if (xnbp->xnb_evtchn != INVALID_EVTCHN) { 1872 xvdi_free_evtchn(dip); 1873 xnbp->xnb_evtchn = INVALID_EVTCHN; 1874 } 1875 1876 if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) { 1877 struct gnttab_unmap_grant_ref unmap_op; 1878 1879 unmap_op.host_addr = (uint64_t)(uintptr_t) 1880 xnbp->xnb_rx_ring_addr; 1881 unmap_op.dev_bus_addr = 0; 1882 unmap_op.handle = xnbp->xnb_rx_ring_handle; 1883 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1884 &unmap_op, 1) != 0) 1885 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1886 "cannot unmap rx-ring page (%d)", 1887 unmap_op.status); 1888 1889 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 1890 } 1891 1892 if (xnbp->xnb_rx_ring_addr != NULL) { 1893 hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr); 1894 vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE); 1895 xnbp->xnb_rx_ring_addr = NULL; 1896 } 1897 1898 if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) { 1899 struct gnttab_unmap_grant_ref unmap_op; 1900 1901 unmap_op.host_addr = (uint64_t)(uintptr_t) 1902 xnbp->xnb_tx_ring_addr; 1903 unmap_op.dev_bus_addr = 0; 1904 unmap_op.handle = xnbp->xnb_tx_ring_handle; 1905 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1906 &unmap_op, 1) != 0) 1907 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1908 "cannot unmap tx-ring page (%d)", 1909 unmap_op.status); 1910 1911 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 1912 } 1913 1914 if (xnbp->xnb_tx_ring_addr != NULL) { 1915 hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr); 1916 vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE); 1917 xnbp->xnb_tx_ring_addr = NULL; 1918 } 1919 } 1920 1921 static void 1922 xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1923 void *arg, void *impl_data) 1924 { 1925 _NOTE(ARGUNUSED(id, arg)); 1926 xnb_t *xnbp = ddi_get_driver_private(dip); 1927 XenbusState new_state = *(XenbusState *)impl_data; 1928 1929 ASSERT(xnbp != NULL); 1930 1931 switch (new_state) { 1932 case XenbusStateConnected: 1933 /* spurious state change */ 1934 if (xnbp->xnb_connected) 1935 return; 1936 1937 if (!xnb_read_oe_config(xnbp) || 1938 !xnbp->xnb_flavour->xf_peer_connected(xnbp)) { 1939 cmn_err(CE_WARN, "xnb_oe_state_change: " 1940 "read otherend config error"); 1941 (void) xvdi_switch_state(dip, XBT_NULL, 1942 XenbusStateClosed); 1943 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1944 1945 break; 1946 } 1947 1948 1949 mutex_enter(&xnbp->xnb_state_lock); 1950 xnbp->xnb_fe_status = XNB_STATE_READY; 1951 if (xnbp->xnb_be_status == XNB_STATE_READY) 1952 xnb_start_connect(xnbp); 1953 mutex_exit(&xnbp->xnb_state_lock); 1954 1955 /* 1956 * Now that we've attempted to connect it's reasonable 1957 * to allow an attempt to detach. 1958 */ 1959 xnbp->xnb_detachable = B_TRUE; 1960 1961 break; 1962 1963 case XenbusStateClosing: 1964 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); 1965 1966 break; 1967 1968 case XenbusStateClosed: 1969 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1970 1971 mutex_enter(&xnbp->xnb_tx_lock); 1972 mutex_enter(&xnbp->xnb_rx_lock); 1973 1974 xnb_disconnect_rings(dip); 1975 xnbp->xnb_connected = B_FALSE; 1976 1977 mutex_exit(&xnbp->xnb_rx_lock); 1978 mutex_exit(&xnbp->xnb_tx_lock); 1979 1980 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1981 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1982 /* 1983 * In all likelyhood this is already set (in the above 1984 * case), but if the peer never attempted to connect 1985 * and the domain is destroyed we get here without 1986 * having been through the case above, so we set it to 1987 * be sure. 1988 */ 1989 xnbp->xnb_detachable = B_TRUE; 1990 1991 break; 1992 1993 default: 1994 break; 1995 } 1996 } 1997 1998 static void 1999 xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, 2000 void *arg, void *impl_data) 2001 { 2002 _NOTE(ARGUNUSED(id, arg)); 2003 xnb_t *xnbp = ddi_get_driver_private(dip); 2004 xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data; 2005 2006 ASSERT(xnbp != NULL); 2007 2008 switch (state) { 2009 case Connected: 2010 /* spurious hotplug event */ 2011 if (xnbp->xnb_hotplugged) 2012 break; 2013 2014 if (!xnb_read_xs_config(xnbp)) 2015 break; 2016 2017 if (!xnbp->xnb_flavour->xf_hotplug_connected(xnbp)) 2018 break; 2019 2020 mutex_enter(&xnbp->xnb_tx_lock); 2021 mutex_enter(&xnbp->xnb_rx_lock); 2022 2023 xnbp->xnb_hotplugged = B_TRUE; 2024 2025 mutex_exit(&xnbp->xnb_rx_lock); 2026 mutex_exit(&xnbp->xnb_tx_lock); 2027 2028 mutex_enter(&xnbp->xnb_state_lock); 2029 xnbp->xnb_be_status = XNB_STATE_READY; 2030 if (xnbp->xnb_fe_status == XNB_STATE_READY) 2031 xnb_start_connect(xnbp); 2032 mutex_exit(&xnbp->xnb_state_lock); 2033 2034 break; 2035 2036 default: 2037 break; 2038 } 2039 } 2040 2041 static struct modldrv modldrv = { 2042 &mod_miscops, "xnb", 2043 }; 2044 2045 static struct modlinkage modlinkage = { 2046 MODREV_1, &modldrv, NULL 2047 }; 2048 2049 int 2050 _init(void) 2051 { 2052 int i; 2053 2054 mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL); 2055 2056 i = mod_install(&modlinkage); 2057 if (i != DDI_SUCCESS) 2058 mutex_destroy(&xnb_alloc_page_lock); 2059 2060 return (i); 2061 } 2062 2063 int 2064 _info(struct modinfo *modinfop) 2065 { 2066 return (mod_info(&modlinkage, modinfop)); 2067 } 2068 2069 int 2070 _fini(void) 2071 { 2072 int i; 2073 2074 i = mod_remove(&modlinkage); 2075 if (i == DDI_SUCCESS) 2076 mutex_destroy(&xnb_alloc_page_lock); 2077 2078 return (i); 2079 } 2080