1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifdef DEBUG 28 #define XNB_DEBUG 1 29 #endif /* DEBUG */ 30 31 #include "xnb.h" 32 33 #include <sys/sunddi.h> 34 #include <sys/sunndi.h> 35 #include <sys/modctl.h> 36 #include <sys/conf.h> 37 #include <sys/mac.h> 38 #include <sys/mac_impl.h> /* For mac_fix_cksum(). */ 39 #include <sys/dlpi.h> 40 #include <sys/strsubr.h> 41 #include <sys/strsun.h> 42 #include <sys/types.h> 43 #include <sys/pattr.h> 44 #include <vm/seg_kmem.h> 45 #include <vm/hat_i86.h> 46 #include <xen/sys/xenbus_impl.h> 47 #include <xen/sys/xendev.h> 48 #include <sys/balloon_impl.h> 49 #include <sys/evtchn_impl.h> 50 #include <sys/gnttab.h> 51 #include <vm/vm_dep.h> 52 #include <sys/note.h> 53 #include <sys/gld.h> 54 #include <inet/ip.h> 55 #include <inet/ip_impl.h> 56 57 /* 58 * The terms "transmit" and "receive" are used in alignment with domU, 59 * which means that packets originating from the peer domU are "transmitted" 60 * to other parts of the system and packets are "received" from them. 61 */ 62 63 /* 64 * Should we allow guests to manipulate multicast group membership? 65 */ 66 static boolean_t xnb_multicast_control = B_TRUE; 67 68 static boolean_t xnb_connect_rings(dev_info_t *); 69 static void xnb_disconnect_rings(dev_info_t *); 70 static void xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t, 71 void *, void *); 72 static void xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t, 73 void *, void *); 74 75 static int xnb_txbuf_constructor(void *, void *, int); 76 static void xnb_txbuf_destructor(void *, void *); 77 static void xnb_tx_notify_peer(xnb_t *, boolean_t); 78 static void xnb_tx_mark_complete(xnb_t *, RING_IDX, int16_t); 79 80 mblk_t *xnb_to_peer(xnb_t *, mblk_t *); 81 mblk_t *xnb_copy_to_peer(xnb_t *, mblk_t *); 82 83 static void setup_gop(xnb_t *, gnttab_copy_t *, uchar_t *, 84 size_t, size_t, size_t, grant_ref_t); 85 #pragma inline(setup_gop) 86 static boolean_t is_foreign(void *); 87 #pragma inline(is_foreign) 88 89 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) 90 #define INVALID_GRANT_REF ((grant_ref_t)-1) 91 92 static kmutex_t xnb_alloc_page_lock; 93 94 /* 95 * On a 32 bit PAE system physical and machine addresses are larger 96 * than 32 bits. ddi_btop() on such systems take an unsigned long 97 * argument, and so addresses above 4G are truncated before ddi_btop() 98 * gets to see them. To avoid this, code the shift operation here. 99 */ 100 #define xnb_btop(addr) ((addr) >> PAGESHIFT) 101 102 /* DMA attributes for transmit and receive data */ 103 static ddi_dma_attr_t buf_dma_attr = { 104 DMA_ATTR_V0, /* version of this structure */ 105 0, /* lowest usable address */ 106 0xffffffffffffffffULL, /* highest usable address */ 107 0x7fffffff, /* maximum DMAable byte count */ 108 MMU_PAGESIZE, /* alignment in bytes */ 109 0x7ff, /* bitmap of burst sizes */ 110 1, /* minimum transfer */ 111 0xffffffffU, /* maximum transfer */ 112 0xffffffffffffffffULL, /* maximum segment length */ 113 1, /* maximum number of segments */ 114 1, /* granularity */ 115 0, /* flags (reserved) */ 116 }; 117 118 /* DMA access attributes for data: NOT to be byte swapped. */ 119 static ddi_device_acc_attr_t data_accattr = { 120 DDI_DEVICE_ATTR_V0, 121 DDI_NEVERSWAP_ACC, 122 DDI_STRICTORDER_ACC 123 }; 124 125 /* 126 * Statistics. 127 */ 128 static const char * const aux_statistics[] = { 129 "rx_cksum_deferred", 130 "tx_cksum_no_need", 131 "rx_rsp_notok", 132 "tx_notify_deferred", 133 "tx_notify_sent", 134 "rx_notify_deferred", 135 "rx_notify_sent", 136 "tx_too_early", 137 "rx_too_early", 138 "rx_allocb_failed", 139 "tx_allocb_failed", 140 "rx_foreign_page", 141 "mac_full", 142 "spurious_intr", 143 "allocation_success", 144 "allocation_failure", 145 "small_allocation_success", 146 "small_allocation_failure", 147 "other_allocation_failure", 148 "rx_pageboundary_crossed", 149 "rx_cpoparea_grown", 150 "csum_hardware", 151 "csum_software", 152 "tx_overflow_page", 153 "tx_unexpected_flags", 154 }; 155 156 static int 157 xnb_ks_aux_update(kstat_t *ksp, int flag) 158 { 159 xnb_t *xnbp; 160 kstat_named_t *knp; 161 162 if (flag != KSTAT_READ) 163 return (EACCES); 164 165 xnbp = ksp->ks_private; 166 knp = ksp->ks_data; 167 168 /* 169 * Assignment order should match that of the names in 170 * aux_statistics. 171 */ 172 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_deferred; 173 (knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_no_need; 174 (knp++)->value.ui64 = xnbp->xnb_stat_rx_rsp_notok; 175 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred; 176 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent; 177 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred; 178 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent; 179 (knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early; 180 (knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early; 181 (knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed; 182 (knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed; 183 (knp++)->value.ui64 = xnbp->xnb_stat_rx_foreign_page; 184 (knp++)->value.ui64 = xnbp->xnb_stat_mac_full; 185 (knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr; 186 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_success; 187 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure; 188 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success; 189 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure; 190 (knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure; 191 (knp++)->value.ui64 = xnbp->xnb_stat_rx_pagebndry_crossed; 192 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cpoparea_grown; 193 (knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware; 194 (knp++)->value.ui64 = xnbp->xnb_stat_csum_software; 195 (knp++)->value.ui64 = xnbp->xnb_stat_tx_overflow_page; 196 (knp++)->value.ui64 = xnbp->xnb_stat_tx_unexpected_flags; 197 198 return (0); 199 } 200 201 static boolean_t 202 xnb_ks_init(xnb_t *xnbp) 203 { 204 int nstat = sizeof (aux_statistics) / 205 sizeof (aux_statistics[0]); 206 const char * const *cp = aux_statistics; 207 kstat_named_t *knp; 208 209 /* 210 * Create and initialise kstats. 211 */ 212 xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo), 213 ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net", 214 KSTAT_TYPE_NAMED, nstat, 0); 215 if (xnbp->xnb_kstat_aux == NULL) 216 return (B_FALSE); 217 218 xnbp->xnb_kstat_aux->ks_private = xnbp; 219 xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update; 220 221 knp = xnbp->xnb_kstat_aux->ks_data; 222 while (nstat > 0) { 223 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 224 225 knp++; 226 cp++; 227 nstat--; 228 } 229 230 kstat_install(xnbp->xnb_kstat_aux); 231 232 return (B_TRUE); 233 } 234 235 static void 236 xnb_ks_free(xnb_t *xnbp) 237 { 238 kstat_delete(xnbp->xnb_kstat_aux); 239 } 240 241 /* 242 * Calculate and insert the transport checksum for an arbitrary packet. 243 */ 244 static mblk_t * 245 xnb_software_csum(xnb_t *xnbp, mblk_t *mp) 246 { 247 _NOTE(ARGUNUSED(xnbp)); 248 249 /* 250 * XXPV dme: shouldn't rely on mac_fix_cksum(), not least 251 * because it doesn't cover all of the interesting cases :-( 252 */ 253 mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM); 254 255 return (mac_fix_cksum(mp)); 256 } 257 258 mblk_t * 259 xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) 260 { 261 struct ether_header *ehp; 262 uint16_t sap; 263 uint32_t offset; 264 ipha_t *ipha; 265 266 ASSERT(mp->b_next == NULL); 267 268 /* 269 * Check that the packet is contained in a single mblk. In 270 * the "from peer" path this is true today, but may change 271 * when scatter gather support is added. In the "to peer" 272 * path we cannot be sure, but in most cases it will be true 273 * (in the xnbo case the packet has come from a MAC device 274 * which is unlikely to split packets). 275 */ 276 if (mp->b_cont != NULL) 277 goto software; 278 279 /* 280 * If the MAC has no hardware capability don't do any further 281 * checking. 282 */ 283 if (capab == 0) 284 goto software; 285 286 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 287 ehp = (struct ether_header *)mp->b_rptr; 288 289 if (ntohs(ehp->ether_type) == VLAN_TPID) { 290 struct ether_vlan_header *evhp; 291 292 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 293 evhp = (struct ether_vlan_header *)mp->b_rptr; 294 sap = ntohs(evhp->ether_type); 295 offset = sizeof (struct ether_vlan_header); 296 } else { 297 sap = ntohs(ehp->ether_type); 298 offset = sizeof (struct ether_header); 299 } 300 301 /* 302 * We only attempt to do IPv4 packets in hardware. 303 */ 304 if (sap != ETHERTYPE_IP) 305 goto software; 306 307 /* 308 * We know that this is an IPv4 packet. 309 */ 310 ipha = (ipha_t *)(mp->b_rptr + offset); 311 312 switch (ipha->ipha_protocol) { 313 case IPPROTO_TCP: 314 case IPPROTO_UDP: { 315 uint32_t start, length, stuff, cksum; 316 uint16_t *stuffp; 317 318 /* 319 * This is a TCP/IPv4 or UDP/IPv4 packet, for which we 320 * can use full IPv4 and partial checksum offload. 321 */ 322 if ((capab & (HCKSUM_INET_FULL_V4|HCKSUM_INET_PARTIAL)) == 0) 323 break; 324 325 start = IP_SIMPLE_HDR_LENGTH; 326 length = ntohs(ipha->ipha_length); 327 if (ipha->ipha_protocol == IPPROTO_TCP) { 328 stuff = start + TCP_CHECKSUM_OFFSET; 329 cksum = IP_TCP_CSUM_COMP; 330 } else { 331 stuff = start + UDP_CHECKSUM_OFFSET; 332 cksum = IP_UDP_CSUM_COMP; 333 } 334 stuffp = (uint16_t *)(mp->b_rptr + offset + stuff); 335 336 if (capab & HCKSUM_INET_FULL_V4) { 337 /* 338 * Some devices require that the checksum 339 * field of the packet is zero for full 340 * offload. 341 */ 342 *stuffp = 0; 343 344 mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM); 345 346 xnbp->xnb_stat_csum_hardware++; 347 348 return (mp); 349 } 350 351 if (capab & HCKSUM_INET_PARTIAL) { 352 if (*stuffp == 0) { 353 ipaddr_t src, dst; 354 355 /* 356 * Older Solaris guests don't insert 357 * the pseudo-header checksum, so we 358 * calculate it here. 359 */ 360 src = ipha->ipha_src; 361 dst = ipha->ipha_dst; 362 363 cksum += (dst >> 16) + (dst & 0xFFFF); 364 cksum += (src >> 16) + (src & 0xFFFF); 365 cksum += length - IP_SIMPLE_HDR_LENGTH; 366 367 cksum = (cksum >> 16) + (cksum & 0xFFFF); 368 cksum = (cksum >> 16) + (cksum & 0xFFFF); 369 370 ASSERT(cksum <= 0xFFFF); 371 372 *stuffp = (uint16_t)(cksum ? cksum : ~cksum); 373 } 374 375 mac_hcksum_set(mp, start, stuff, length, 0, 376 HCK_PARTIALCKSUM); 377 378 xnbp->xnb_stat_csum_hardware++; 379 380 return (mp); 381 } 382 383 /* NOTREACHED */ 384 break; 385 } 386 387 default: 388 /* Use software. */ 389 break; 390 } 391 392 software: 393 /* 394 * We are not able to use any offload so do the whole thing in 395 * software. 396 */ 397 xnbp->xnb_stat_csum_software++; 398 399 return (xnb_software_csum(xnbp, mp)); 400 } 401 402 int 403 xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data) 404 { 405 xnb_t *xnbp; 406 char *xsname; 407 char cachename[32]; 408 409 xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP); 410 411 xnbp->xnb_flavour = flavour; 412 xnbp->xnb_flavour_data = flavour_data; 413 xnbp->xnb_devinfo = dip; 414 xnbp->xnb_evtchn = INVALID_EVTCHN; 415 xnbp->xnb_irq = B_FALSE; 416 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 417 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 418 xnbp->xnb_connected = B_FALSE; 419 xnbp->xnb_hotplugged = B_FALSE; 420 xnbp->xnb_detachable = B_FALSE; 421 xnbp->xnb_peer = xvdi_get_oeid(dip); 422 xnbp->xnb_be_status = XNB_STATE_INIT; 423 xnbp->xnb_fe_status = XNB_STATE_INIT; 424 425 xnbp->xnb_tx_buf_count = 0; 426 427 xnbp->xnb_rx_hv_copy = B_FALSE; 428 xnbp->xnb_multicast_control = B_FALSE; 429 430 xnbp->xnb_rx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 431 ASSERT(xnbp->xnb_rx_va != NULL); 432 433 if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie) 434 != DDI_SUCCESS) 435 goto failure; 436 437 /* Allocated on demand, when/if we enter xnb_copy_to_peer(). */ 438 xnbp->xnb_rx_cpop = NULL; 439 xnbp->xnb_rx_cpop_count = 0; 440 441 mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER, 442 xnbp->xnb_icookie); 443 mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER, 444 xnbp->xnb_icookie); 445 mutex_init(&xnbp->xnb_state_lock, NULL, MUTEX_DRIVER, 446 xnbp->xnb_icookie); 447 448 /* Set driver private pointer now. */ 449 ddi_set_driver_private(dip, xnbp); 450 451 (void) sprintf(cachename, "xnb_tx_buf_cache_%d", ddi_get_instance(dip)); 452 xnbp->xnb_tx_buf_cache = kmem_cache_create(cachename, 453 sizeof (xnb_txbuf_t), 0, 454 xnb_txbuf_constructor, xnb_txbuf_destructor, 455 NULL, xnbp, NULL, 0); 456 if (xnbp->xnb_tx_buf_cache == NULL) 457 goto failure_0; 458 459 if (!xnb_ks_init(xnbp)) 460 goto failure_1; 461 462 /* 463 * Receive notification of changes in the state of the 464 * driver in the guest domain. 465 */ 466 if (xvdi_add_event_handler(dip, XS_OE_STATE, xnb_oe_state_change, 467 NULL) != DDI_SUCCESS) 468 goto failure_2; 469 470 /* 471 * Receive notification of hotplug events. 472 */ 473 if (xvdi_add_event_handler(dip, XS_HP_STATE, xnb_hp_state_change, 474 NULL) != DDI_SUCCESS) 475 goto failure_2; 476 477 xsname = xvdi_get_xsname(dip); 478 479 if (xenbus_printf(XBT_NULL, xsname, 480 "feature-multicast-control", "%d", 481 xnb_multicast_control ? 1 : 0) != 0) 482 goto failure_3; 483 484 if (xenbus_printf(XBT_NULL, xsname, 485 "feature-rx-copy", "%d", 1) != 0) 486 goto failure_3; 487 /* 488 * Linux domUs seem to depend on "feature-rx-flip" being 0 489 * in addition to "feature-rx-copy" being 1. It seems strange 490 * to use four possible states to describe a binary decision, 491 * but we might as well play nice. 492 */ 493 if (xenbus_printf(XBT_NULL, xsname, 494 "feature-rx-flip", "%d", 0) != 0) 495 goto failure_3; 496 497 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait); 498 (void) xvdi_post_event(dip, XEN_HP_ADD); 499 500 return (DDI_SUCCESS); 501 502 failure_3: 503 xvdi_remove_event_handler(dip, NULL); 504 505 failure_2: 506 xnb_ks_free(xnbp); 507 508 failure_1: 509 kmem_cache_destroy(xnbp->xnb_tx_buf_cache); 510 511 failure_0: 512 mutex_destroy(&xnbp->xnb_state_lock); 513 mutex_destroy(&xnbp->xnb_rx_lock); 514 mutex_destroy(&xnbp->xnb_tx_lock); 515 516 failure: 517 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE); 518 kmem_free(xnbp, sizeof (*xnbp)); 519 return (DDI_FAILURE); 520 } 521 522 void 523 xnb_detach(dev_info_t *dip) 524 { 525 xnb_t *xnbp = ddi_get_driver_private(dip); 526 527 ASSERT(xnbp != NULL); 528 ASSERT(!xnbp->xnb_connected); 529 ASSERT(xnbp->xnb_tx_buf_count == 0); 530 531 xnb_disconnect_rings(dip); 532 533 xvdi_remove_event_handler(dip, NULL); 534 535 xnb_ks_free(xnbp); 536 537 kmem_cache_destroy(xnbp->xnb_tx_buf_cache); 538 539 ddi_set_driver_private(dip, NULL); 540 541 mutex_destroy(&xnbp->xnb_state_lock); 542 mutex_destroy(&xnbp->xnb_rx_lock); 543 mutex_destroy(&xnbp->xnb_tx_lock); 544 545 if (xnbp->xnb_rx_cpop_count > 0) 546 kmem_free(xnbp->xnb_rx_cpop, sizeof (xnbp->xnb_rx_cpop[0]) 547 * xnbp->xnb_rx_cpop_count); 548 549 ASSERT(xnbp->xnb_rx_va != NULL); 550 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE); 551 552 kmem_free(xnbp, sizeof (*xnbp)); 553 } 554 555 /* 556 * Allocate a page from the hypervisor to be flipped to the peer. 557 * 558 * Try to get pages in batches to reduce the overhead of calls into 559 * the balloon driver. 560 */ 561 static mfn_t 562 xnb_alloc_page(xnb_t *xnbp) 563 { 564 #define WARNING_RATE_LIMIT 100 565 #define BATCH_SIZE 256 566 static mfn_t mfns[BATCH_SIZE]; /* common across all instances */ 567 static int nth = BATCH_SIZE; 568 mfn_t mfn; 569 570 mutex_enter(&xnb_alloc_page_lock); 571 if (nth == BATCH_SIZE) { 572 if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) { 573 xnbp->xnb_stat_allocation_failure++; 574 mutex_exit(&xnb_alloc_page_lock); 575 576 /* 577 * Try for a single page in low memory situations. 578 */ 579 if (balloon_alloc_pages(1, &mfn) != 1) { 580 if ((xnbp->xnb_stat_small_allocation_failure++ 581 % WARNING_RATE_LIMIT) == 0) 582 cmn_err(CE_WARN, "xnb_alloc_page: " 583 "Cannot allocate memory to " 584 "transfer packets to peer."); 585 return (0); 586 } else { 587 xnbp->xnb_stat_small_allocation_success++; 588 return (mfn); 589 } 590 } 591 592 nth = 0; 593 xnbp->xnb_stat_allocation_success++; 594 } 595 596 mfn = mfns[nth++]; 597 mutex_exit(&xnb_alloc_page_lock); 598 599 ASSERT(mfn != 0); 600 601 return (mfn); 602 #undef BATCH_SIZE 603 #undef WARNING_RATE_LIMIT 604 } 605 606 /* 607 * Free a page back to the hypervisor. 608 * 609 * This happens only in the error path, so batching is not worth the 610 * complication. 611 */ 612 static void 613 xnb_free_page(xnb_t *xnbp, mfn_t mfn) 614 { 615 _NOTE(ARGUNUSED(xnbp)); 616 int r; 617 pfn_t pfn; 618 619 pfn = xen_assign_pfn(mfn); 620 pfnzero(pfn, 0, PAGESIZE); 621 xen_release_pfn(pfn); 622 623 if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) { 624 cmn_err(CE_WARN, "free_page: cannot decrease memory " 625 "reservation (%d): page kept but unusable (mfn = 0x%lx).", 626 r, mfn); 627 } 628 } 629 630 /* 631 * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but using 632 * local variables. Used in both xnb_to_peer() and xnb_copy_to_peer(). 633 */ 634 #define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \ 635 ((((_r)->sring->req_prod - loop) < \ 636 (RING_SIZE(_r) - (loop - prod))) ? \ 637 ((_r)->sring->req_prod - loop) : \ 638 (RING_SIZE(_r) - (loop - prod))) 639 640 /* 641 * Pass packets to the peer using page flipping. 642 */ 643 mblk_t * 644 xnb_to_peer(xnb_t *xnbp, mblk_t *mp) 645 { 646 mblk_t *free = mp, *prev = NULL; 647 size_t len; 648 gnttab_transfer_t *gop; 649 boolean_t notify; 650 RING_IDX loop, prod, end; 651 652 /* 653 * For each packet the sequence of operations is: 654 * 655 * 1. get a new page from the hypervisor. 656 * 2. get a request slot from the ring. 657 * 3. copy the data into the new page. 658 * 4. transfer the page to the peer. 659 * 5. update the request slot. 660 * 6. kick the peer. 661 * 7. free mp. 662 * 663 * In order to reduce the number of hypercalls, we prepare 664 * several packets for the peer and perform a single hypercall 665 * to transfer them. 666 */ 667 668 len = 0; 669 mutex_enter(&xnbp->xnb_rx_lock); 670 671 /* 672 * If we are not connected to the peer or have not yet 673 * finished hotplug it is too early to pass packets to the 674 * peer. 675 */ 676 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 677 mutex_exit(&xnbp->xnb_rx_lock); 678 DTRACE_PROBE(flip_rx_too_early); 679 xnbp->xnb_stat_rx_too_early++; 680 return (mp); 681 } 682 683 loop = xnbp->xnb_rx_ring.req_cons; 684 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 685 gop = xnbp->xnb_rx_top; 686 687 while ((mp != NULL) && 688 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 689 690 mfn_t mfn; 691 pfn_t pfn; 692 netif_rx_request_t *rxreq; 693 netif_rx_response_t *rxresp; 694 char *valoop; 695 mblk_t *ml; 696 uint16_t cksum_flags; 697 698 /* 1 */ 699 if ((mfn = xnb_alloc_page(xnbp)) == 0) { 700 xnbp->xnb_stat_rx_defer++; 701 break; 702 } 703 704 /* 2 */ 705 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 706 707 #ifdef XNB_DEBUG 708 if (!(rxreq->id < NET_RX_RING_SIZE)) 709 cmn_err(CE_PANIC, "xnb_to_peer: " 710 "id %d out of range in request 0x%p", 711 rxreq->id, (void *)rxreq); 712 #endif /* XNB_DEBUG */ 713 714 /* Assign a pfn and map the new page at the allocated va. */ 715 pfn = xen_assign_pfn(mfn); 716 hat_devload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE, 717 pfn, PROT_READ | PROT_WRITE, HAT_LOAD); 718 719 /* 3 */ 720 len = 0; 721 valoop = xnbp->xnb_rx_va; 722 for (ml = mp; ml != NULL; ml = ml->b_cont) { 723 size_t chunk = ml->b_wptr - ml->b_rptr; 724 725 bcopy(ml->b_rptr, valoop, chunk); 726 valoop += chunk; 727 len += chunk; 728 } 729 730 ASSERT(len < PAGESIZE); 731 732 /* Release the pfn. */ 733 hat_unload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE, 734 HAT_UNLOAD_UNMAP); 735 xen_release_pfn(pfn); 736 737 /* 4 */ 738 gop->mfn = mfn; 739 gop->domid = xnbp->xnb_peer; 740 gop->ref = rxreq->gref; 741 742 /* 5.1 */ 743 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 744 rxresp->offset = 0; 745 rxresp->flags = 0; 746 747 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 748 if (cksum_flags != 0) 749 xnbp->xnb_stat_rx_cksum_deferred++; 750 rxresp->flags |= cksum_flags; 751 752 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 753 rxresp->status = len; 754 755 loop++; 756 prod++; 757 gop++; 758 prev = mp; 759 mp = mp->b_next; 760 } 761 762 /* 763 * Did we actually do anything? 764 */ 765 if (loop == xnbp->xnb_rx_ring.req_cons) { 766 mutex_exit(&xnbp->xnb_rx_lock); 767 return (mp); 768 } 769 770 end = loop; 771 772 /* 773 * Unlink the end of the 'done' list from the remainder. 774 */ 775 ASSERT(prev != NULL); 776 prev->b_next = NULL; 777 778 if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_rx_top, 779 loop - xnbp->xnb_rx_ring.req_cons) != 0) { 780 cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed"); 781 } 782 783 loop = xnbp->xnb_rx_ring.req_cons; 784 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 785 gop = xnbp->xnb_rx_top; 786 787 while (loop < end) { 788 int16_t status = NETIF_RSP_OKAY; 789 790 if (gop->status != 0) { 791 status = NETIF_RSP_ERROR; 792 793 /* 794 * If the status is anything other than 795 * GNTST_bad_page then we don't own the page 796 * any more, so don't try to give it back. 797 */ 798 if (gop->status != GNTST_bad_page) 799 gop->mfn = 0; 800 } else { 801 /* The page is no longer ours. */ 802 gop->mfn = 0; 803 } 804 805 if (gop->mfn != 0) 806 /* 807 * Give back the page, as we won't be using 808 * it. 809 */ 810 xnb_free_page(xnbp, gop->mfn); 811 else 812 /* 813 * We gave away a page, update our accounting 814 * now. 815 */ 816 balloon_drv_subtracted(1); 817 818 /* 5.2 */ 819 if (status != NETIF_RSP_OKAY) { 820 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 821 status; 822 } else { 823 xnbp->xnb_stat_ipackets++; 824 xnbp->xnb_stat_rbytes += len; 825 } 826 827 loop++; 828 prod++; 829 gop++; 830 } 831 832 xnbp->xnb_rx_ring.req_cons = loop; 833 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 834 835 /* 6 */ 836 /* LINTED: constant in conditional context */ 837 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 838 if (notify) { 839 ec_notify_via_evtchn(xnbp->xnb_evtchn); 840 xnbp->xnb_stat_rx_notify_sent++; 841 } else { 842 xnbp->xnb_stat_rx_notify_deferred++; 843 } 844 845 if (mp != NULL) 846 xnbp->xnb_stat_rx_defer++; 847 848 mutex_exit(&xnbp->xnb_rx_lock); 849 850 /* Free mblk_t's that we consumed. */ 851 freemsgchain(free); 852 853 return (mp); 854 } 855 856 /* Helper functions for xnb_copy_to_peer(). */ 857 858 /* 859 * Grow the array of copy operation descriptors. 860 */ 861 static boolean_t 862 grow_cpop_area(xnb_t *xnbp) 863 { 864 size_t count; 865 gnttab_copy_t *new; 866 867 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 868 869 count = xnbp->xnb_rx_cpop_count + CPOP_DEFCNT; 870 871 if ((new = kmem_alloc(sizeof (new[0]) * count, KM_NOSLEEP)) == NULL) { 872 xnbp->xnb_stat_other_allocation_failure++; 873 return (B_FALSE); 874 } 875 876 bcopy(xnbp->xnb_rx_cpop, new, 877 sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count); 878 879 kmem_free(xnbp->xnb_rx_cpop, 880 sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count); 881 882 xnbp->xnb_rx_cpop = new; 883 xnbp->xnb_rx_cpop_count = count; 884 885 xnbp->xnb_stat_rx_cpoparea_grown++; 886 887 return (B_TRUE); 888 } 889 890 /* 891 * Check whether an address is on a page that's foreign to this domain. 892 */ 893 static boolean_t 894 is_foreign(void *addr) 895 { 896 pfn_t pfn = hat_getpfnum(kas.a_hat, addr); 897 898 return ((pfn & PFN_IS_FOREIGN_MFN) == PFN_IS_FOREIGN_MFN); 899 } 900 901 /* 902 * Insert a newly allocated mblk into a chain, replacing the old one. 903 */ 904 static mblk_t * 905 replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev) 906 { 907 uint32_t start, stuff, end, value, flags; 908 mblk_t *new_mp; 909 910 new_mp = copyb(mp); 911 if (new_mp == NULL) { 912 cmn_err(CE_PANIC, "replace_msg: cannot alloc new message" 913 "for %p, len %lu", (void *) mp, len); 914 } 915 916 mac_hcksum_get(mp, &start, &stuff, &end, &value, &flags); 917 mac_hcksum_set(new_mp, start, stuff, end, value, flags); 918 919 new_mp->b_next = mp->b_next; 920 new_mp->b_prev = mp->b_prev; 921 new_mp->b_cont = mp->b_cont; 922 923 /* Make sure we only overwrite pointers to the mblk being replaced. */ 924 if (mp_prev != NULL && mp_prev->b_next == mp) 925 mp_prev->b_next = new_mp; 926 927 if (ml_prev != NULL && ml_prev->b_cont == mp) 928 ml_prev->b_cont = new_mp; 929 930 mp->b_next = mp->b_prev = mp->b_cont = NULL; 931 freemsg(mp); 932 933 return (new_mp); 934 } 935 936 /* 937 * Set all the fields in a gnttab_copy_t. 938 */ 939 static void 940 setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr, 941 size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref) 942 { 943 ASSERT(xnbp != NULL && gp != NULL); 944 945 gp->source.offset = s_off; 946 gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr)); 947 gp->source.domid = DOMID_SELF; 948 949 gp->len = (uint16_t)len; 950 gp->flags = GNTCOPY_dest_gref; 951 gp->status = 0; 952 953 gp->dest.u.ref = d_ref; 954 gp->dest.offset = d_off; 955 gp->dest.domid = xnbp->xnb_peer; 956 } 957 958 /* 959 * Pass packets to the peer using hypervisor copy operations. 960 */ 961 mblk_t * 962 xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp) 963 { 964 mblk_t *free = mp, *mp_prev = NULL, *saved_mp = mp; 965 mblk_t *ml, *ml_prev; 966 boolean_t notify; 967 RING_IDX loop, prod; 968 int i; 969 970 /* 971 * If the peer does not pre-post buffers for received packets, 972 * use page flipping to pass packets to it. 973 */ 974 if (!xnbp->xnb_rx_hv_copy) 975 return (xnb_to_peer(xnbp, mp)); 976 977 /* 978 * For each packet the sequence of operations is: 979 * 980 * 1. get a request slot from the ring. 981 * 2. set up data for hypercall (see NOTE below) 982 * 3. have the hypervisore copy the data 983 * 4. update the request slot. 984 * 5. kick the peer. 985 * 986 * NOTE ad 2. 987 * In order to reduce the number of hypercalls, we prepare 988 * several mblks (mp->b_cont != NULL) for the peer and 989 * perform a single hypercall to transfer them. We also have 990 * to set up a seperate copy operation for every page. 991 * 992 * If we have more than one packet (mp->b_next != NULL), we do 993 * this whole dance repeatedly. 994 */ 995 996 mutex_enter(&xnbp->xnb_rx_lock); 997 998 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 999 mutex_exit(&xnbp->xnb_rx_lock); 1000 DTRACE_PROBE(copy_rx_too_early); 1001 xnbp->xnb_stat_rx_too_early++; 1002 return (mp); 1003 } 1004 1005 loop = xnbp->xnb_rx_ring.req_cons; 1006 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 1007 1008 while ((mp != NULL) && 1009 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 1010 netif_rx_request_t *rxreq; 1011 size_t d_offset, len; 1012 int item_count; 1013 gnttab_copy_t *gop_cp; 1014 netif_rx_response_t *rxresp; 1015 uint16_t cksum_flags; 1016 int16_t status = NETIF_RSP_OKAY; 1017 1018 /* 1 */ 1019 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 1020 1021 #ifdef XNB_DEBUG 1022 if (!(rxreq->id < NET_RX_RING_SIZE)) 1023 cmn_err(CE_PANIC, "xnb_copy_to_peer: " 1024 "id %d out of range in request 0x%p", 1025 rxreq->id, (void *)rxreq); 1026 #endif /* XNB_DEBUG */ 1027 1028 /* 2 */ 1029 d_offset = 0; 1030 len = 0; 1031 item_count = 0; 1032 1033 gop_cp = xnbp->xnb_rx_cpop; 1034 1035 /* 1036 * We walk the b_cont pointers and set up a 1037 * gnttab_copy_t for each sub-page chunk in each data 1038 * block. 1039 */ 1040 /* 2a */ 1041 for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) { 1042 size_t chunk = ml->b_wptr - ml->b_rptr; 1043 uchar_t *r_tmp, *rpt_align; 1044 size_t r_offset; 1045 1046 /* 1047 * The hypervisor will not allow us to 1048 * reference a foreign page (e.g. one 1049 * belonging to another domain) by mfn in the 1050 * copy operation. If the data in this mblk is 1051 * on such a page we must copy the data into a 1052 * local page before initiating the hypervisor 1053 * copy operation. 1054 */ 1055 if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) { 1056 mblk_t *ml_new = replace_msg(ml, chunk, 1057 mp_prev, ml_prev); 1058 1059 /* We can still use old ml, but not *ml! */ 1060 if (free == ml) 1061 free = ml_new; 1062 if (mp == ml) 1063 mp = ml_new; 1064 ml = ml_new; 1065 1066 xnbp->xnb_stat_rx_foreign_page++; 1067 } 1068 1069 rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr); 1070 r_offset = (uint16_t)(ml->b_rptr - rpt_align); 1071 r_tmp = ml->b_rptr; 1072 1073 if (d_offset + chunk > PAGESIZE) 1074 cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p " 1075 "(svd: %p), ml %p,rpt_alg. %p, d_offset " 1076 "(%lu) + chunk (%lu) > PAGESIZE %d!", 1077 (void *)mp, (void *)saved_mp, (void *)ml, 1078 (void *)rpt_align, 1079 d_offset, chunk, (int)PAGESIZE); 1080 1081 while (chunk > 0) { 1082 size_t part_len; 1083 1084 if (item_count == xnbp->xnb_rx_cpop_count) { 1085 if (!grow_cpop_area(xnbp)) 1086 goto failure; 1087 gop_cp = &xnbp->xnb_rx_cpop[item_count]; 1088 } 1089 /* 1090 * If our mblk crosses a page boundary, we need 1091 * to do a seperate copy for each page. 1092 */ 1093 if (r_offset + chunk > PAGESIZE) { 1094 part_len = PAGESIZE - r_offset; 1095 1096 DTRACE_PROBE3(mblk_page_crossed, 1097 (mblk_t *), ml, int, chunk, int, 1098 (int)r_offset); 1099 1100 xnbp->xnb_stat_rx_pagebndry_crossed++; 1101 } else { 1102 part_len = chunk; 1103 } 1104 1105 setup_gop(xnbp, gop_cp, r_tmp, r_offset, 1106 d_offset, part_len, rxreq->gref); 1107 1108 chunk -= part_len; 1109 1110 len += part_len; 1111 d_offset += part_len; 1112 r_tmp += part_len; 1113 /* 1114 * The 2nd, 3rd ... last copies will always 1115 * start at r_tmp, therefore r_offset is 0. 1116 */ 1117 r_offset = 0; 1118 gop_cp++; 1119 item_count++; 1120 } 1121 ml_prev = ml; 1122 1123 DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int, 1124 chunk, int, len, int, item_count); 1125 } 1126 /* 3 */ 1127 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_rx_cpop, 1128 item_count) != 0) { 1129 cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed"); 1130 DTRACE_PROBE(HV_granttableopfailed); 1131 } 1132 1133 /* 4 */ 1134 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 1135 rxresp->offset = 0; 1136 1137 rxresp->flags = 0; 1138 1139 DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int, 1140 (int)rxresp->offset, int, (int)rxresp->flags, int, 1141 (int)rxresp->status); 1142 1143 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 1144 if (cksum_flags != 0) 1145 xnbp->xnb_stat_rx_cksum_deferred++; 1146 rxresp->flags |= cksum_flags; 1147 1148 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 1149 rxresp->status = len; 1150 1151 DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int, 1152 (int)rxresp->offset, int, (int)rxresp->flags, int, 1153 (int)rxresp->status); 1154 1155 for (i = 0; i < item_count; i++) { 1156 if (xnbp->xnb_rx_cpop[i].status != 0) { 1157 DTRACE_PROBE2(cpop_status_nonnull, int, 1158 (int)xnbp->xnb_rx_cpop[i].status, 1159 int, i); 1160 status = NETIF_RSP_ERROR; 1161 } 1162 } 1163 1164 /* 5.2 */ 1165 if (status != NETIF_RSP_OKAY) { 1166 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 1167 status; 1168 xnbp->xnb_stat_rx_rsp_notok++; 1169 } else { 1170 xnbp->xnb_stat_ipackets++; 1171 xnbp->xnb_stat_rbytes += len; 1172 } 1173 1174 loop++; 1175 prod++; 1176 mp_prev = mp; 1177 mp = mp->b_next; 1178 } 1179 failure: 1180 /* 1181 * Did we actually do anything? 1182 */ 1183 if (loop == xnbp->xnb_rx_ring.req_cons) { 1184 mutex_exit(&xnbp->xnb_rx_lock); 1185 return (mp); 1186 } 1187 1188 /* 1189 * Unlink the end of the 'done' list from the remainder. 1190 */ 1191 ASSERT(mp_prev != NULL); 1192 mp_prev->b_next = NULL; 1193 1194 xnbp->xnb_rx_ring.req_cons = loop; 1195 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 1196 1197 /* 6 */ 1198 /* LINTED: constant in conditional context */ 1199 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 1200 if (notify) { 1201 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1202 xnbp->xnb_stat_rx_notify_sent++; 1203 } else { 1204 xnbp->xnb_stat_rx_notify_deferred++; 1205 } 1206 1207 if (mp != NULL) 1208 xnbp->xnb_stat_rx_defer++; 1209 1210 mutex_exit(&xnbp->xnb_rx_lock); 1211 1212 /* Free mblk_t structs we have consumed. */ 1213 freemsgchain(free); 1214 1215 return (mp); 1216 } 1217 1218 1219 static void 1220 xnb_tx_notify_peer(xnb_t *xnbp, boolean_t force) 1221 { 1222 boolean_t notify; 1223 1224 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1225 1226 /* LINTED: constant in conditional context */ 1227 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify); 1228 if (notify || force) { 1229 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1230 xnbp->xnb_stat_tx_notify_sent++; 1231 } else { 1232 xnbp->xnb_stat_tx_notify_deferred++; 1233 } 1234 } 1235 1236 static void 1237 xnb_tx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status) 1238 { 1239 RING_IDX i; 1240 netif_tx_response_t *txresp; 1241 1242 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1243 1244 i = xnbp->xnb_tx_ring.rsp_prod_pvt; 1245 1246 txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i); 1247 txresp->id = id; 1248 txresp->status = status; 1249 1250 xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1; 1251 1252 /* 1253 * Note that we don't push the change to the peer here - that 1254 * is the callers responsibility. 1255 */ 1256 } 1257 1258 static void 1259 xnb_txbuf_recycle(xnb_txbuf_t *txp) 1260 { 1261 xnb_t *xnbp = txp->xt_xnbp; 1262 1263 kmem_cache_free(xnbp->xnb_tx_buf_cache, txp); 1264 1265 xnbp->xnb_tx_buf_outstanding--; 1266 } 1267 1268 static int 1269 xnb_txbuf_constructor(void *buf, void *arg, int kmflag) 1270 { 1271 _NOTE(ARGUNUSED(kmflag)); 1272 xnb_txbuf_t *txp = buf; 1273 xnb_t *xnbp = arg; 1274 size_t len; 1275 ddi_dma_cookie_t dma_cookie; 1276 uint_t ncookies; 1277 1278 txp->xt_free_rtn.free_func = xnb_txbuf_recycle; 1279 txp->xt_free_rtn.free_arg = (caddr_t)txp; 1280 txp->xt_xnbp = xnbp; 1281 txp->xt_next = NULL; 1282 1283 if (ddi_dma_alloc_handle(xnbp->xnb_devinfo, &buf_dma_attr, 1284 0, 0, &txp->xt_dma_handle) != DDI_SUCCESS) 1285 goto failure; 1286 1287 if (ddi_dma_mem_alloc(txp->xt_dma_handle, PAGESIZE, &data_accattr, 1288 DDI_DMA_STREAMING, 0, 0, &txp->xt_buf, &len, 1289 &txp->xt_acc_handle) != DDI_SUCCESS) 1290 goto failure_1; 1291 1292 if (ddi_dma_addr_bind_handle(txp->xt_dma_handle, NULL, txp->xt_buf, 1293 len, DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 0, 1294 &dma_cookie, &ncookies) 1295 != DDI_DMA_MAPPED) 1296 goto failure_2; 1297 ASSERT(ncookies == 1); 1298 1299 txp->xt_mfn = xnb_btop(dma_cookie.dmac_laddress); 1300 txp->xt_buflen = dma_cookie.dmac_size; 1301 1302 DTRACE_PROBE(txbuf_allocated); 1303 1304 atomic_inc_32(&xnbp->xnb_tx_buf_count); 1305 xnbp->xnb_tx_buf_outstanding++; 1306 1307 return (0); 1308 1309 failure_2: 1310 ddi_dma_mem_free(&txp->xt_acc_handle); 1311 1312 failure_1: 1313 ddi_dma_free_handle(&txp->xt_dma_handle); 1314 1315 failure: 1316 1317 return (-1); 1318 } 1319 1320 static void 1321 xnb_txbuf_destructor(void *buf, void *arg) 1322 { 1323 xnb_txbuf_t *txp = buf; 1324 xnb_t *xnbp = arg; 1325 1326 (void) ddi_dma_unbind_handle(txp->xt_dma_handle); 1327 ddi_dma_mem_free(&txp->xt_acc_handle); 1328 ddi_dma_free_handle(&txp->xt_dma_handle); 1329 1330 atomic_dec_32(&xnbp->xnb_tx_buf_count); 1331 } 1332 1333 /* 1334 * Take packets from the peer and deliver them onward. 1335 */ 1336 static mblk_t * 1337 xnb_from_peer(xnb_t *xnbp) 1338 { 1339 RING_IDX start, end, loop; 1340 gnttab_copy_t *cop; 1341 xnb_txbuf_t **txpp; 1342 netif_tx_request_t *txreq; 1343 boolean_t work_to_do, need_notify = B_FALSE; 1344 mblk_t *head, *tail; 1345 int n_data_req, i; 1346 1347 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1348 1349 head = tail = NULL; 1350 around: 1351 1352 /* LINTED: constant in conditional context */ 1353 RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do); 1354 if (!work_to_do) { 1355 finished: 1356 xnb_tx_notify_peer(xnbp, need_notify); 1357 1358 return (head); 1359 } 1360 1361 start = xnbp->xnb_tx_ring.req_cons; 1362 end = xnbp->xnb_tx_ring.sring->req_prod; 1363 1364 if ((end - start) > NET_TX_RING_SIZE) { 1365 /* 1366 * This usually indicates that the frontend driver is 1367 * misbehaving, as it's not possible to have more than 1368 * NET_TX_RING_SIZE ring elements in play at any one 1369 * time. 1370 * 1371 * We reset the ring pointers to the state declared by 1372 * the frontend and try to carry on. 1373 */ 1374 cmn_err(CE_WARN, "xnb_from_peer: domain %d tried to give us %u " 1375 "items in the ring, resetting and trying to recover.", 1376 xnbp->xnb_peer, (end - start)); 1377 1378 /* LINTED: constant in conditional context */ 1379 BACK_RING_ATTACH(&xnbp->xnb_tx_ring, 1380 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1381 1382 goto around; 1383 } 1384 1385 loop = start; 1386 cop = xnbp->xnb_tx_cop; 1387 txpp = xnbp->xnb_tx_bufp; 1388 n_data_req = 0; 1389 1390 while (loop < end) { 1391 static const uint16_t acceptable_flags = 1392 NETTXF_csum_blank | 1393 NETTXF_data_validated | 1394 NETTXF_extra_info; 1395 uint16_t unexpected_flags; 1396 1397 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop); 1398 1399 unexpected_flags = txreq->flags & ~acceptable_flags; 1400 if (unexpected_flags != 0) { 1401 /* 1402 * The peer used flag bits that we do not 1403 * recognize. 1404 */ 1405 cmn_err(CE_WARN, "xnb_from_peer: " 1406 "unexpected flag bits (0x%x) from peer " 1407 "in transmit request", 1408 unexpected_flags); 1409 xnbp->xnb_stat_tx_unexpected_flags++; 1410 1411 /* Mark this entry as failed. */ 1412 xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR); 1413 need_notify = B_TRUE; 1414 1415 } else if (txreq->flags & NETTXF_extra_info) { 1416 struct netif_extra_info *erp; 1417 boolean_t status; 1418 1419 loop++; /* Consume another slot in the ring. */ 1420 ASSERT(loop <= end); 1421 1422 erp = (struct netif_extra_info *) 1423 RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop); 1424 1425 switch (erp->type) { 1426 case XEN_NETIF_EXTRA_TYPE_MCAST_ADD: 1427 ASSERT(xnbp->xnb_multicast_control); 1428 status = xnbp->xnb_flavour->xf_mcast_add(xnbp, 1429 &erp->u.mcast.addr); 1430 break; 1431 case XEN_NETIF_EXTRA_TYPE_MCAST_DEL: 1432 ASSERT(xnbp->xnb_multicast_control); 1433 status = xnbp->xnb_flavour->xf_mcast_del(xnbp, 1434 &erp->u.mcast.addr); 1435 break; 1436 default: 1437 status = B_FALSE; 1438 cmn_err(CE_WARN, "xnb_from_peer: " 1439 "unknown extra type %d", erp->type); 1440 break; 1441 } 1442 1443 xnb_tx_mark_complete(xnbp, txreq->id, 1444 status ? NETIF_RSP_OKAY : NETIF_RSP_ERROR); 1445 need_notify = B_TRUE; 1446 1447 } else if ((txreq->offset > PAGESIZE) || 1448 (txreq->offset + txreq->size > PAGESIZE)) { 1449 /* 1450 * Peer attempted to refer to data beyond the 1451 * end of the granted page. 1452 */ 1453 cmn_err(CE_WARN, "xnb_from_peer: " 1454 "attempt to refer beyond the end of granted " 1455 "page in txreq (offset %d, size %d).", 1456 txreq->offset, txreq->size); 1457 xnbp->xnb_stat_tx_overflow_page++; 1458 1459 /* Mark this entry as failed. */ 1460 xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR); 1461 need_notify = B_TRUE; 1462 1463 } else { 1464 xnb_txbuf_t *txp; 1465 1466 txp = kmem_cache_alloc(xnbp->xnb_tx_buf_cache, 1467 KM_NOSLEEP); 1468 if (txp == NULL) 1469 break; 1470 1471 txp->xt_mblk = desballoc((unsigned char *)txp->xt_buf, 1472 txp->xt_buflen, 0, &txp->xt_free_rtn); 1473 if (txp->xt_mblk == NULL) { 1474 kmem_cache_free(xnbp->xnb_tx_buf_cache, txp); 1475 break; 1476 } 1477 1478 txp->xt_idx = loop; 1479 txp->xt_id = txreq->id; 1480 1481 cop->source.u.ref = txreq->gref; 1482 cop->source.domid = xnbp->xnb_peer; 1483 cop->source.offset = txreq->offset; 1484 1485 cop->dest.u.gmfn = txp->xt_mfn; 1486 cop->dest.domid = DOMID_SELF; 1487 cop->dest.offset = 0; 1488 1489 cop->len = txreq->size; 1490 cop->flags = GNTCOPY_source_gref; 1491 cop->status = 0; 1492 1493 *txpp = txp; 1494 1495 txpp++; 1496 cop++; 1497 n_data_req++; 1498 1499 ASSERT(n_data_req <= NET_TX_RING_SIZE); 1500 } 1501 1502 loop++; 1503 } 1504 1505 xnbp->xnb_tx_ring.req_cons = loop; 1506 1507 if (n_data_req == 0) 1508 goto around; 1509 1510 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, 1511 xnbp->xnb_tx_cop, n_data_req) != 0) { 1512 1513 cmn_err(CE_WARN, "xnb_from_peer: copy operation failed"); 1514 1515 txpp = xnbp->xnb_tx_bufp; 1516 i = n_data_req; 1517 while (i > 0) { 1518 kmem_cache_free(xnbp->xnb_tx_buf_cache, *txpp); 1519 txpp++; 1520 i--; 1521 } 1522 1523 goto finished; 1524 } 1525 1526 txpp = xnbp->xnb_tx_bufp; 1527 cop = xnbp->xnb_tx_cop; 1528 i = n_data_req; 1529 1530 while (i > 0) { 1531 xnb_txbuf_t *txp = *txpp; 1532 1533 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, txp->xt_idx); 1534 1535 if (cop->status != 0) { 1536 #ifdef XNB_DEBUG 1537 cmn_err(CE_WARN, "xnb_from_peer: " 1538 "txpp 0x%p failed (%d)", 1539 (void *)*txpp, cop->status); 1540 #endif /* XNB_DEBUG */ 1541 xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_ERROR); 1542 freemsg(txp->xt_mblk); 1543 } else { 1544 mblk_t *mp; 1545 1546 mp = txp->xt_mblk; 1547 mp->b_rptr = mp->b_wptr = (unsigned char *)txp->xt_buf; 1548 mp->b_wptr += txreq->size; 1549 mp->b_next = NULL; 1550 1551 /* 1552 * If there are checksum flags, process them 1553 * appropriately. 1554 */ 1555 if ((txreq->flags & 1556 (NETTXF_csum_blank | NETTXF_data_validated)) 1557 != 0) { 1558 mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp, 1559 mp, txreq->flags); 1560 xnbp->xnb_stat_tx_cksum_no_need++; 1561 1562 txp->xt_mblk = mp; 1563 } 1564 1565 if (head == NULL) { 1566 ASSERT(tail == NULL); 1567 head = mp; 1568 } else { 1569 ASSERT(tail != NULL); 1570 tail->b_next = mp; 1571 } 1572 tail = mp; 1573 1574 xnbp->xnb_stat_opackets++; 1575 xnbp->xnb_stat_obytes += txreq->size; 1576 1577 xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_OKAY); 1578 } 1579 1580 txpp++; 1581 cop++; 1582 i--; 1583 } 1584 1585 goto around; 1586 /* NOTREACHED */ 1587 } 1588 1589 static uint_t 1590 xnb_intr(caddr_t arg) 1591 { 1592 xnb_t *xnbp = (xnb_t *)arg; 1593 mblk_t *mp; 1594 1595 xnbp->xnb_stat_intr++; 1596 1597 mutex_enter(&xnbp->xnb_tx_lock); 1598 1599 ASSERT(xnbp->xnb_connected); 1600 1601 mp = xnb_from_peer(xnbp); 1602 1603 mutex_exit(&xnbp->xnb_tx_lock); 1604 1605 if (!xnbp->xnb_hotplugged) { 1606 xnbp->xnb_stat_tx_too_early++; 1607 goto fail; 1608 } 1609 if (mp == NULL) { 1610 xnbp->xnb_stat_spurious_intr++; 1611 goto fail; 1612 } 1613 1614 xnbp->xnb_flavour->xf_from_peer(xnbp, mp); 1615 1616 return (DDI_INTR_CLAIMED); 1617 1618 fail: 1619 freemsgchain(mp); 1620 return (DDI_INTR_CLAIMED); 1621 } 1622 1623 /* 1624 * Read our configuration from xenstore. 1625 */ 1626 boolean_t 1627 xnb_read_xs_config(xnb_t *xnbp) 1628 { 1629 char *xsname; 1630 char mac[ETHERADDRL * 3]; 1631 1632 xsname = xvdi_get_xsname(xnbp->xnb_devinfo); 1633 1634 if (xenbus_scanf(XBT_NULL, xsname, 1635 "mac", "%s", mac) != 0) { 1636 cmn_err(CE_WARN, "xnb_attach: " 1637 "cannot read mac address from %s", 1638 xsname); 1639 return (B_FALSE); 1640 } 1641 1642 if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) { 1643 cmn_err(CE_WARN, 1644 "xnb_attach: cannot parse mac address %s", 1645 mac); 1646 return (B_FALSE); 1647 } 1648 1649 return (B_TRUE); 1650 } 1651 1652 /* 1653 * Read the configuration of the peer from xenstore. 1654 */ 1655 boolean_t 1656 xnb_read_oe_config(xnb_t *xnbp) 1657 { 1658 char *oename; 1659 int i; 1660 1661 oename = xvdi_get_oename(xnbp->xnb_devinfo); 1662 1663 if (xenbus_gather(XBT_NULL, oename, 1664 "event-channel", "%u", &xnbp->xnb_fe_evtchn, 1665 "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref, 1666 "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref, 1667 NULL) != 0) { 1668 cmn_err(CE_WARN, "xnb_read_oe_config: " 1669 "cannot read other-end details from %s", 1670 oename); 1671 return (B_FALSE); 1672 } 1673 1674 /* 1675 * Check whether our peer requests receive side hypervisor 1676 * copy. 1677 */ 1678 if (xenbus_scanf(XBT_NULL, oename, 1679 "request-rx-copy", "%d", &i) != 0) 1680 i = 0; 1681 if (i != 0) 1682 xnbp->xnb_rx_hv_copy = B_TRUE; 1683 1684 /* 1685 * Check whether our peer requests multicast_control. 1686 */ 1687 if (xenbus_scanf(XBT_NULL, oename, 1688 "request-multicast-control", "%d", &i) != 0) 1689 i = 0; 1690 if (i != 0) 1691 xnbp->xnb_multicast_control = B_TRUE; 1692 1693 /* 1694 * The Linux backend driver here checks to see if the peer has 1695 * set 'feature-no-csum-offload'. This is used to indicate 1696 * that the guest cannot handle receiving packets without a 1697 * valid checksum. We don't check here, because packets passed 1698 * to the peer _always_ have a valid checksum. 1699 * 1700 * There are three cases: 1701 * 1702 * - the NIC is dedicated: packets from the wire should always 1703 * have a valid checksum. If the hardware validates the 1704 * checksum then the relevant bit will be set in the packet 1705 * attributes and we will inform the peer. It can choose to 1706 * ignore the hardware verification. 1707 * 1708 * - the NIC is shared (VNIC) and a packet originates from the 1709 * wire: this is the same as the case above - the packets 1710 * will have a valid checksum. 1711 * 1712 * - the NIC is shared (VNIC) and a packet originates from the 1713 * host: the MAC layer ensures that all such packets have a 1714 * valid checksum by calculating one if the stack did not. 1715 */ 1716 1717 return (B_TRUE); 1718 } 1719 1720 void 1721 xnb_start_connect(xnb_t *xnbp) 1722 { 1723 dev_info_t *dip = xnbp->xnb_devinfo; 1724 1725 if (!xnb_connect_rings(dip)) { 1726 cmn_err(CE_WARN, "xnb_start_connect: " 1727 "cannot connect rings"); 1728 goto failed; 1729 } 1730 1731 if (!xnbp->xnb_flavour->xf_start_connect(xnbp)) { 1732 cmn_err(CE_WARN, "xnb_start_connect: " 1733 "flavour failed to connect"); 1734 goto failed; 1735 } 1736 1737 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1738 return; 1739 1740 failed: 1741 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1742 xnb_disconnect_rings(dip); 1743 (void) xvdi_switch_state(dip, XBT_NULL, 1744 XenbusStateClosed); 1745 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1746 } 1747 1748 static boolean_t 1749 xnb_connect_rings(dev_info_t *dip) 1750 { 1751 xnb_t *xnbp = ddi_get_driver_private(dip); 1752 struct gnttab_map_grant_ref map_op; 1753 1754 /* 1755 * Cannot attempt to connect the rings if already connected. 1756 */ 1757 ASSERT(!xnbp->xnb_connected); 1758 1759 /* 1760 * 1. allocate a vaddr for the tx page, one for the rx page. 1761 * 2. call GNTTABOP_map_grant_ref to map the relevant pages 1762 * into the allocated vaddr (one for tx, one for rx). 1763 * 3. call EVTCHNOP_bind_interdomain to have the event channel 1764 * bound to this domain. 1765 * 4. associate the event channel with an interrupt. 1766 * 5. enable the interrupt. 1767 */ 1768 1769 /* 1.tx */ 1770 xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1771 0, 0, 0, 0, VM_SLEEP); 1772 ASSERT(xnbp->xnb_tx_ring_addr != NULL); 1773 1774 /* 2.tx */ 1775 map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr); 1776 map_op.flags = GNTMAP_host_map; 1777 map_op.ref = xnbp->xnb_tx_ring_ref; 1778 map_op.dom = xnbp->xnb_peer; 1779 hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr, NULL); 1780 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 || 1781 map_op.status != 0) { 1782 cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page."); 1783 goto fail; 1784 } 1785 xnbp->xnb_tx_ring_handle = map_op.handle; 1786 1787 /* LINTED: constant in conditional context */ 1788 BACK_RING_INIT(&xnbp->xnb_tx_ring, 1789 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1790 1791 /* 1.rx */ 1792 xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1793 0, 0, 0, 0, VM_SLEEP); 1794 ASSERT(xnbp->xnb_rx_ring_addr != NULL); 1795 1796 /* 2.rx */ 1797 map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr); 1798 map_op.flags = GNTMAP_host_map; 1799 map_op.ref = xnbp->xnb_rx_ring_ref; 1800 map_op.dom = xnbp->xnb_peer; 1801 hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr, NULL); 1802 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 || 1803 map_op.status != 0) { 1804 cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page."); 1805 goto fail; 1806 } 1807 xnbp->xnb_rx_ring_handle = map_op.handle; 1808 1809 /* LINTED: constant in conditional context */ 1810 BACK_RING_INIT(&xnbp->xnb_rx_ring, 1811 (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE); 1812 1813 /* 3 */ 1814 if (xvdi_bind_evtchn(dip, xnbp->xnb_fe_evtchn) != DDI_SUCCESS) { 1815 cmn_err(CE_WARN, "xnb_connect_rings: " 1816 "cannot bind event channel %d", xnbp->xnb_evtchn); 1817 xnbp->xnb_evtchn = INVALID_EVTCHN; 1818 goto fail; 1819 } 1820 xnbp->xnb_evtchn = xvdi_get_evtchn(dip); 1821 1822 /* 1823 * It would be good to set the state to XenbusStateConnected 1824 * here as well, but then what if ddi_add_intr() failed? 1825 * Changing the state in the store will be noticed by the peer 1826 * and cannot be "taken back". 1827 */ 1828 mutex_enter(&xnbp->xnb_tx_lock); 1829 mutex_enter(&xnbp->xnb_rx_lock); 1830 1831 xnbp->xnb_connected = B_TRUE; 1832 1833 mutex_exit(&xnbp->xnb_rx_lock); 1834 mutex_exit(&xnbp->xnb_tx_lock); 1835 1836 /* 4, 5 */ 1837 if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp) 1838 != DDI_SUCCESS) { 1839 cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt"); 1840 goto fail; 1841 } 1842 xnbp->xnb_irq = B_TRUE; 1843 1844 return (B_TRUE); 1845 1846 fail: 1847 mutex_enter(&xnbp->xnb_tx_lock); 1848 mutex_enter(&xnbp->xnb_rx_lock); 1849 1850 xnbp->xnb_connected = B_FALSE; 1851 1852 mutex_exit(&xnbp->xnb_rx_lock); 1853 mutex_exit(&xnbp->xnb_tx_lock); 1854 1855 return (B_FALSE); 1856 } 1857 1858 static void 1859 xnb_disconnect_rings(dev_info_t *dip) 1860 { 1861 xnb_t *xnbp = ddi_get_driver_private(dip); 1862 1863 if (xnbp->xnb_irq) { 1864 ddi_remove_intr(dip, 0, NULL); 1865 xnbp->xnb_irq = B_FALSE; 1866 } 1867 1868 if (xnbp->xnb_evtchn != INVALID_EVTCHN) { 1869 xvdi_free_evtchn(dip); 1870 xnbp->xnb_evtchn = INVALID_EVTCHN; 1871 } 1872 1873 if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) { 1874 struct gnttab_unmap_grant_ref unmap_op; 1875 1876 unmap_op.host_addr = (uint64_t)(uintptr_t) 1877 xnbp->xnb_rx_ring_addr; 1878 unmap_op.dev_bus_addr = 0; 1879 unmap_op.handle = xnbp->xnb_rx_ring_handle; 1880 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1881 &unmap_op, 1) != 0) 1882 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1883 "cannot unmap rx-ring page (%d)", 1884 unmap_op.status); 1885 1886 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 1887 } 1888 1889 if (xnbp->xnb_rx_ring_addr != NULL) { 1890 hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr); 1891 vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE); 1892 xnbp->xnb_rx_ring_addr = NULL; 1893 } 1894 1895 if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) { 1896 struct gnttab_unmap_grant_ref unmap_op; 1897 1898 unmap_op.host_addr = (uint64_t)(uintptr_t) 1899 xnbp->xnb_tx_ring_addr; 1900 unmap_op.dev_bus_addr = 0; 1901 unmap_op.handle = xnbp->xnb_tx_ring_handle; 1902 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1903 &unmap_op, 1) != 0) 1904 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1905 "cannot unmap tx-ring page (%d)", 1906 unmap_op.status); 1907 1908 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 1909 } 1910 1911 if (xnbp->xnb_tx_ring_addr != NULL) { 1912 hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr); 1913 vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE); 1914 xnbp->xnb_tx_ring_addr = NULL; 1915 } 1916 } 1917 1918 static void 1919 xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1920 void *arg, void *impl_data) 1921 { 1922 _NOTE(ARGUNUSED(id, arg)); 1923 xnb_t *xnbp = ddi_get_driver_private(dip); 1924 XenbusState new_state = *(XenbusState *)impl_data; 1925 1926 ASSERT(xnbp != NULL); 1927 1928 switch (new_state) { 1929 case XenbusStateConnected: 1930 /* spurious state change */ 1931 if (xnbp->xnb_connected) 1932 return; 1933 1934 if (!xnb_read_oe_config(xnbp) || 1935 !xnbp->xnb_flavour->xf_peer_connected(xnbp)) { 1936 cmn_err(CE_WARN, "xnb_oe_state_change: " 1937 "read otherend config error"); 1938 (void) xvdi_switch_state(dip, XBT_NULL, 1939 XenbusStateClosed); 1940 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1941 1942 break; 1943 } 1944 1945 1946 mutex_enter(&xnbp->xnb_state_lock); 1947 xnbp->xnb_fe_status = XNB_STATE_READY; 1948 if (xnbp->xnb_be_status == XNB_STATE_READY) 1949 xnb_start_connect(xnbp); 1950 mutex_exit(&xnbp->xnb_state_lock); 1951 1952 /* 1953 * Now that we've attempted to connect it's reasonable 1954 * to allow an attempt to detach. 1955 */ 1956 xnbp->xnb_detachable = B_TRUE; 1957 1958 break; 1959 1960 case XenbusStateClosing: 1961 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); 1962 1963 break; 1964 1965 case XenbusStateClosed: 1966 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1967 1968 mutex_enter(&xnbp->xnb_tx_lock); 1969 mutex_enter(&xnbp->xnb_rx_lock); 1970 1971 xnb_disconnect_rings(dip); 1972 xnbp->xnb_connected = B_FALSE; 1973 1974 mutex_exit(&xnbp->xnb_rx_lock); 1975 mutex_exit(&xnbp->xnb_tx_lock); 1976 1977 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1978 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1979 /* 1980 * In all likelyhood this is already set (in the above 1981 * case), but if the peer never attempted to connect 1982 * and the domain is destroyed we get here without 1983 * having been through the case above, so we set it to 1984 * be sure. 1985 */ 1986 xnbp->xnb_detachable = B_TRUE; 1987 1988 break; 1989 1990 default: 1991 break; 1992 } 1993 } 1994 1995 static void 1996 xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1997 void *arg, void *impl_data) 1998 { 1999 _NOTE(ARGUNUSED(id, arg)); 2000 xnb_t *xnbp = ddi_get_driver_private(dip); 2001 xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data; 2002 2003 ASSERT(xnbp != NULL); 2004 2005 switch (state) { 2006 case Connected: 2007 /* spurious hotplug event */ 2008 if (xnbp->xnb_hotplugged) 2009 break; 2010 2011 if (!xnb_read_xs_config(xnbp)) 2012 break; 2013 2014 if (!xnbp->xnb_flavour->xf_hotplug_connected(xnbp)) 2015 break; 2016 2017 mutex_enter(&xnbp->xnb_tx_lock); 2018 mutex_enter(&xnbp->xnb_rx_lock); 2019 2020 xnbp->xnb_hotplugged = B_TRUE; 2021 2022 mutex_exit(&xnbp->xnb_rx_lock); 2023 mutex_exit(&xnbp->xnb_tx_lock); 2024 2025 mutex_enter(&xnbp->xnb_state_lock); 2026 xnbp->xnb_be_status = XNB_STATE_READY; 2027 if (xnbp->xnb_fe_status == XNB_STATE_READY) 2028 xnb_start_connect(xnbp); 2029 mutex_exit(&xnbp->xnb_state_lock); 2030 2031 break; 2032 2033 default: 2034 break; 2035 } 2036 } 2037 2038 static struct modldrv modldrv = { 2039 &mod_miscops, "xnb", 2040 }; 2041 2042 static struct modlinkage modlinkage = { 2043 MODREV_1, &modldrv, NULL 2044 }; 2045 2046 int 2047 _init(void) 2048 { 2049 int i; 2050 2051 mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL); 2052 2053 i = mod_install(&modlinkage); 2054 if (i != DDI_SUCCESS) 2055 mutex_destroy(&xnb_alloc_page_lock); 2056 2057 return (i); 2058 } 2059 2060 int 2061 _info(struct modinfo *modinfop) 2062 { 2063 return (mod_info(&modlinkage, modinfop)); 2064 } 2065 2066 int 2067 _fini(void) 2068 { 2069 int i; 2070 2071 i = mod_remove(&modlinkage); 2072 if (i == DDI_SUCCESS) 2073 mutex_destroy(&xnb_alloc_page_lock); 2074 2075 return (i); 2076 } 2077