1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifdef DEBUG 28 #define XNB_DEBUG 1 29 #endif /* DEBUG */ 30 31 #include "xnb.h" 32 33 #include <sys/sunddi.h> 34 #include <sys/sunndi.h> 35 #include <sys/modctl.h> 36 #include <sys/conf.h> 37 #include <sys/mac.h> 38 #include <sys/dlpi.h> 39 #include <sys/strsubr.h> 40 #include <sys/strsun.h> 41 #include <sys/types.h> 42 #include <sys/pattr.h> 43 #include <vm/seg_kmem.h> 44 #include <vm/hat_i86.h> 45 #include <xen/sys/xenbus_impl.h> 46 #include <xen/sys/xendev.h> 47 #include <sys/balloon_impl.h> 48 #include <sys/evtchn_impl.h> 49 #include <sys/gnttab.h> 50 #include <vm/vm_dep.h> 51 52 #include <sys/gld.h> 53 #include <inet/ip.h> 54 #include <inet/ip_impl.h> 55 #include <sys/vnic_impl.h> /* blech. */ 56 57 /* 58 * The terms "transmit" and "receive" are used in their traditional 59 * sense here - packets from other parts of this system are 60 * "transmitted" to the peer domain and those originating from the 61 * peer are "received". 62 * 63 * In some cases this can be confusing, because various data 64 * structures are shared with the domU driver, which has the opposite 65 * view of what constitutes "transmit" and "receive". In naming the 66 * shared structures the domU driver always wins. 67 */ 68 69 /* 70 * XXPV dme: things to do, as well as various things indicated 71 * throughout the source: 72 * - copy avoidance outbound. 73 * - copy avoidance inbound. 74 * - transfer credit limiting. 75 * - MAC address based filtering. 76 */ 77 78 /* 79 * Linux expects to have some headroom in received buffers. The Linux 80 * frontend driver (netfront) checks to see if the headroom is 81 * available and will re-allocate the buffer to make room if 82 * necessary. To avoid this we add TX_BUFFER_HEADROOM bytes of 83 * headroom to each packet we pass to the peer. 84 */ 85 #define TX_BUFFER_HEADROOM 16 86 87 static boolean_t xnb_cksum_offload = B_TRUE; 88 89 static boolean_t xnb_connect_rings(dev_info_t *); 90 static void xnb_disconnect_rings(dev_info_t *); 91 static void xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t, 92 void *, void *); 93 static void xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t, 94 void *, void *); 95 96 static int xnb_rxbuf_constructor(void *, void *, int); 97 static void xnb_rxbuf_destructor(void *, void *); 98 static xnb_rxbuf_t *xnb_rxbuf_get(xnb_t *, int); 99 static void xnb_rxbuf_put(xnb_t *, xnb_rxbuf_t *); 100 static void xnb_rx_notify_peer(xnb_t *); 101 static void xnb_rx_complete(xnb_rxbuf_t *); 102 static void xnb_rx_mark_complete(xnb_t *, RING_IDX, int16_t); 103 static void xnb_rx_schedule_unmop(xnb_t *, gnttab_map_grant_ref_t *, 104 xnb_rxbuf_t *); 105 static void xnb_rx_perform_pending_unmop(xnb_t *); 106 mblk_t *xnb_copy_to_peer(xnb_t *, mblk_t *); 107 108 int xnb_unmop_lowwat = NET_TX_RING_SIZE >> 2; 109 int xnb_unmop_hiwat = NET_TX_RING_SIZE - (NET_TX_RING_SIZE >> 2); 110 111 112 boolean_t xnb_hv_copy = B_TRUE; 113 boolean_t xnb_explicit_pageflip_set = B_FALSE; 114 115 /* XXPV dme: are these really invalid? */ 116 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) 117 #define INVALID_GRANT_REF ((grant_ref_t)-1) 118 119 static kmem_cache_t *xnb_rxbuf_cachep; 120 static kmutex_t xnb_alloc_page_lock; 121 122 /* 123 * Statistics. 124 */ 125 static char *aux_statistics[] = { 126 "tx_cksum_deferred", 127 "rx_cksum_no_need", 128 "tx_rsp_notok", 129 "tx_notify_deferred", 130 "tx_notify_sent", 131 "rx_notify_deferred", 132 "rx_notify_sent", 133 "tx_too_early", 134 "rx_too_early", 135 "rx_allocb_failed", 136 "tx_allocb_failed", 137 "tx_foreign_page", 138 "mac_full", 139 "spurious_intr", 140 "allocation_success", 141 "allocation_failure", 142 "small_allocation_success", 143 "small_allocation_failure", 144 "other_allocation_failure", 145 "tx_pageboundary_crossed", 146 "tx_cpoparea_grown", 147 "csum_hardware", 148 "csum_software", 149 }; 150 151 static int 152 xnb_ks_aux_update(kstat_t *ksp, int flag) 153 { 154 xnb_t *xnbp; 155 kstat_named_t *knp; 156 157 if (flag != KSTAT_READ) 158 return (EACCES); 159 160 xnbp = ksp->ks_private; 161 knp = ksp->ks_data; 162 163 /* 164 * Assignment order should match that of the names in 165 * aux_statistics. 166 */ 167 (knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_deferred; 168 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_no_need; 169 (knp++)->value.ui64 = xnbp->xnb_stat_tx_rsp_notok; 170 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred; 171 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent; 172 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred; 173 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent; 174 (knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early; 175 (knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early; 176 (knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed; 177 (knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed; 178 (knp++)->value.ui64 = xnbp->xnb_stat_tx_foreign_page; 179 (knp++)->value.ui64 = xnbp->xnb_stat_mac_full; 180 (knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr; 181 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_success; 182 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure; 183 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success; 184 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure; 185 (knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure; 186 (knp++)->value.ui64 = xnbp->xnb_stat_tx_pagebndry_crossed; 187 (knp++)->value.ui64 = xnbp->xnb_stat_tx_cpoparea_grown; 188 (knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware; 189 (knp++)->value.ui64 = xnbp->xnb_stat_csum_software; 190 191 return (0); 192 } 193 194 static boolean_t 195 xnb_ks_init(xnb_t *xnbp) 196 { 197 int nstat = sizeof (aux_statistics) / 198 sizeof (aux_statistics[0]); 199 char **cp = aux_statistics; 200 kstat_named_t *knp; 201 202 /* 203 * Create and initialise kstats. 204 */ 205 xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo), 206 ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net", 207 KSTAT_TYPE_NAMED, nstat, 0); 208 if (xnbp->xnb_kstat_aux == NULL) 209 return (B_FALSE); 210 211 xnbp->xnb_kstat_aux->ks_private = xnbp; 212 xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update; 213 214 knp = xnbp->xnb_kstat_aux->ks_data; 215 while (nstat > 0) { 216 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 217 218 knp++; 219 cp++; 220 nstat--; 221 } 222 223 kstat_install(xnbp->xnb_kstat_aux); 224 225 return (B_TRUE); 226 } 227 228 static void 229 xnb_ks_free(xnb_t *xnbp) 230 { 231 kstat_delete(xnbp->xnb_kstat_aux); 232 } 233 234 /* 235 * Software checksum calculation and insertion for an arbitrary packet. 236 */ 237 /*ARGSUSED*/ 238 static mblk_t * 239 xnb_software_csum(xnb_t *xnbp, mblk_t *mp) 240 { 241 /* 242 * XXPV dme: shouldn't rely on vnic_fix_cksum(), not least 243 * because it doesn't cover all of the interesting cases :-( 244 */ 245 (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, 246 HCK_FULLCKSUM, KM_NOSLEEP); 247 248 return (vnic_fix_cksum(mp)); 249 } 250 251 mblk_t * 252 xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) 253 { 254 struct ether_header *ehp; 255 uint16_t sap; 256 uint32_t offset; 257 ipha_t *ipha; 258 259 ASSERT(mp->b_next == NULL); 260 261 /* 262 * Check that the packet is contained in a single mblk. In 263 * the "from peer" path this is true today, but will change 264 * when scatter gather support is added. In the "to peer" 265 * path we cannot be sure, but in most cases it will be true 266 * (in the xnbo case the packet has come from a MAC device 267 * which is unlikely to split packets). 268 */ 269 if (mp->b_cont != NULL) 270 goto software; 271 272 /* 273 * If the MAC has no hardware capability don't do any further 274 * checking. 275 */ 276 if (capab == 0) 277 goto software; 278 279 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 280 ehp = (struct ether_header *)mp->b_rptr; 281 282 if (ntohs(ehp->ether_type) == VLAN_TPID) { 283 struct ether_vlan_header *evhp; 284 285 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 286 evhp = (struct ether_vlan_header *)mp->b_rptr; 287 sap = ntohs(evhp->ether_type); 288 offset = sizeof (struct ether_vlan_header); 289 } else { 290 sap = ntohs(ehp->ether_type); 291 offset = sizeof (struct ether_header); 292 } 293 294 /* 295 * We only attempt to do IPv4 packets in hardware. 296 */ 297 if (sap != ETHERTYPE_IP) 298 goto software; 299 300 /* 301 * We know that this is an IPv4 packet. 302 */ 303 ipha = (ipha_t *)(mp->b_rptr + offset); 304 305 switch (ipha->ipha_protocol) { 306 case IPPROTO_TCP: 307 case IPPROTO_UDP: { 308 uint32_t start, length, stuff, cksum; 309 uint16_t *stuffp; 310 311 /* 312 * This is a TCP/IPv4 or UDP/IPv4 packet, for which we 313 * can use full IPv4 and partial checksum offload. 314 */ 315 if ((capab & (HCKSUM_INET_FULL_V4|HCKSUM_INET_PARTIAL)) == 0) 316 break; 317 318 start = IP_SIMPLE_HDR_LENGTH; 319 length = ntohs(ipha->ipha_length); 320 if (ipha->ipha_protocol == IPPROTO_TCP) { 321 stuff = start + TCP_CHECKSUM_OFFSET; 322 cksum = IP_TCP_CSUM_COMP; 323 } else { 324 stuff = start + UDP_CHECKSUM_OFFSET; 325 cksum = IP_UDP_CSUM_COMP; 326 } 327 stuffp = (uint16_t *)(mp->b_rptr + offset + stuff); 328 329 if (capab & HCKSUM_INET_FULL_V4) { 330 /* 331 * Some devices require that the checksum 332 * field of the packet is zero for full 333 * offload. 334 */ 335 *stuffp = 0; 336 337 (void) hcksum_assoc(mp, NULL, NULL, 338 0, 0, 0, 0, 339 HCK_FULLCKSUM, KM_NOSLEEP); 340 341 xnbp->xnb_stat_csum_hardware++; 342 343 return (mp); 344 } 345 346 if (capab & HCKSUM_INET_PARTIAL) { 347 if (*stuffp == 0) { 348 ipaddr_t src, dst; 349 350 /* 351 * Older Solaris guests don't insert 352 * the pseudo-header checksum, so we 353 * calculate it here. 354 */ 355 src = ipha->ipha_src; 356 dst = ipha->ipha_dst; 357 358 cksum += (dst >> 16) + (dst & 0xFFFF); 359 cksum += (src >> 16) + (src & 0xFFFF); 360 cksum += length - IP_SIMPLE_HDR_LENGTH; 361 362 cksum = (cksum >> 16) + (cksum & 0xFFFF); 363 cksum = (cksum >> 16) + (cksum & 0xFFFF); 364 365 ASSERT(cksum <= 0xFFFF); 366 367 *stuffp = (uint16_t)(cksum ? cksum : ~cksum); 368 } 369 370 (void) hcksum_assoc(mp, NULL, NULL, 371 start, stuff, length, 0, 372 HCK_PARTIALCKSUM, KM_NOSLEEP); 373 374 xnbp->xnb_stat_csum_hardware++; 375 376 return (mp); 377 } 378 379 /* NOTREACHED */ 380 break; 381 } 382 383 default: 384 /* Use software. */ 385 break; 386 } 387 388 software: 389 /* 390 * We are not able to use any offload so do the whole thing in 391 * software. 392 */ 393 xnbp->xnb_stat_csum_software++; 394 395 return (xnb_software_csum(xnbp, mp)); 396 } 397 398 int 399 xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data) 400 { 401 xnb_t *xnbp; 402 char *xsname, mac[ETHERADDRL * 3]; 403 404 xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP); 405 406 xnbp->xnb_flavour = flavour; 407 xnbp->xnb_flavour_data = flavour_data; 408 xnbp->xnb_devinfo = dip; 409 xnbp->xnb_evtchn = INVALID_EVTCHN; 410 xnbp->xnb_irq = B_FALSE; 411 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 412 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 413 xnbp->xnb_cksum_offload = xnb_cksum_offload; 414 xnbp->xnb_connected = B_FALSE; 415 xnbp->xnb_hotplugged = B_FALSE; 416 xnbp->xnb_detachable = B_FALSE; 417 xnbp->xnb_peer = xvdi_get_oeid(dip); 418 xnbp->xnb_rx_pages_writable = B_FALSE; 419 420 xnbp->xnb_rx_buf_count = 0; 421 xnbp->xnb_rx_unmop_count = 0; 422 423 xnbp->xnb_hv_copy = B_FALSE; 424 425 xnbp->xnb_tx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 426 ASSERT(xnbp->xnb_tx_va != NULL); 427 428 if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie) 429 != DDI_SUCCESS) 430 goto failure; 431 432 /* allocated on demand, when/if we enter xnb_copy_to_peer() */ 433 xnbp->xnb_tx_cpop = NULL; 434 xnbp->xnb_cpop_sz = 0; 435 436 mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER, 437 xnbp->xnb_icookie); 438 mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER, 439 xnbp->xnb_icookie); 440 441 /* set driver private pointer now */ 442 ddi_set_driver_private(dip, xnbp); 443 444 if (!xnb_ks_init(xnbp)) 445 goto failure_1; 446 447 /* 448 * Receive notification of changes in the state of the 449 * driver in the guest domain. 450 */ 451 if (xvdi_add_event_handler(dip, XS_OE_STATE, 452 xnb_oe_state_change) != DDI_SUCCESS) 453 goto failure_2; 454 455 /* 456 * Receive notification of hotplug events. 457 */ 458 if (xvdi_add_event_handler(dip, XS_HP_STATE, 459 xnb_hp_state_change) != DDI_SUCCESS) 460 goto failure_2; 461 462 xsname = xvdi_get_xsname(dip); 463 464 if (xenbus_printf(XBT_NULL, xsname, 465 "feature-no-csum-offload", "%d", 466 xnbp->xnb_cksum_offload ? 0 : 1) != 0) 467 goto failure_3; 468 469 /* 470 * Use global xnb_hv_copy to export this feature. This means that 471 * we have to decide what to do before starting up a guest domain 472 */ 473 if (xenbus_printf(XBT_NULL, xsname, 474 "feature-rx-copy", "%d", xnb_hv_copy ? 1 : 0) != 0) 475 goto failure_3; 476 /* 477 * Linux domUs seem to depend on "feature-rx-flip" being 0 478 * in addition to "feature-rx-copy" being 1. It seems strange 479 * to use four possible states to describe a binary decision, 480 * but we might as well play nice. 481 */ 482 if (xenbus_printf(XBT_NULL, xsname, 483 "feature-rx-flip", "%d", xnb_explicit_pageflip_set ? 1 : 0) != 0) 484 goto failure_3; 485 486 if (xenbus_scanf(XBT_NULL, xsname, 487 "mac", "%s", mac) != 0) { 488 cmn_err(CE_WARN, "xnb_attach: " 489 "cannot read mac address from %s", 490 xsname); 491 goto failure_3; 492 } 493 494 if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) { 495 cmn_err(CE_WARN, 496 "xnb_attach: cannot parse mac address %s", 497 mac); 498 goto failure_3; 499 } 500 501 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait); 502 (void) xvdi_post_event(dip, XEN_HP_ADD); 503 504 return (DDI_SUCCESS); 505 506 failure_3: 507 xvdi_remove_event_handler(dip, NULL); 508 509 failure_2: 510 xnb_ks_free(xnbp); 511 512 failure_1: 513 mutex_destroy(&xnbp->xnb_rx_lock); 514 mutex_destroy(&xnbp->xnb_tx_lock); 515 516 failure: 517 vmem_free(heap_arena, xnbp->xnb_tx_va, PAGESIZE); 518 kmem_free(xnbp, sizeof (*xnbp)); 519 return (DDI_FAILURE); 520 } 521 522 /*ARGSUSED*/ 523 void 524 xnb_detach(dev_info_t *dip) 525 { 526 xnb_t *xnbp = ddi_get_driver_private(dip); 527 528 ASSERT(xnbp != NULL); 529 ASSERT(!xnbp->xnb_connected); 530 ASSERT(xnbp->xnb_rx_buf_count == 0); 531 532 xnb_disconnect_rings(dip); 533 534 xvdi_remove_event_handler(dip, NULL); 535 536 xnb_ks_free(xnbp); 537 538 ddi_set_driver_private(dip, NULL); 539 540 mutex_destroy(&xnbp->xnb_tx_lock); 541 mutex_destroy(&xnbp->xnb_rx_lock); 542 543 if (xnbp->xnb_cpop_sz > 0) 544 kmem_free(xnbp->xnb_tx_cpop, sizeof (*xnbp->xnb_tx_cpop) 545 * xnbp->xnb_cpop_sz); 546 547 ASSERT(xnbp->xnb_tx_va != NULL); 548 vmem_free(heap_arena, xnbp->xnb_tx_va, PAGESIZE); 549 550 kmem_free(xnbp, sizeof (*xnbp)); 551 } 552 553 554 static mfn_t 555 xnb_alloc_page(xnb_t *xnbp) 556 { 557 #define WARNING_RATE_LIMIT 100 558 #define BATCH_SIZE 256 559 static mfn_t mfns[BATCH_SIZE]; /* common across all instances */ 560 static int nth = BATCH_SIZE; 561 mfn_t mfn; 562 563 mutex_enter(&xnb_alloc_page_lock); 564 if (nth == BATCH_SIZE) { 565 if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) { 566 xnbp->xnb_stat_allocation_failure++; 567 mutex_exit(&xnb_alloc_page_lock); 568 569 /* 570 * Try for a single page in low memory situations. 571 */ 572 if (balloon_alloc_pages(1, &mfn) != 1) { 573 if ((xnbp->xnb_stat_small_allocation_failure++ 574 % WARNING_RATE_LIMIT) == 0) 575 cmn_err(CE_WARN, "xnb_alloc_page: " 576 "Cannot allocate memory to " 577 "transfer packets to peer."); 578 return (0); 579 } else { 580 xnbp->xnb_stat_small_allocation_success++; 581 return (mfn); 582 } 583 } 584 585 nth = 0; 586 xnbp->xnb_stat_allocation_success++; 587 } 588 589 mfn = mfns[nth++]; 590 mutex_exit(&xnb_alloc_page_lock); 591 592 ASSERT(mfn != 0); 593 594 return (mfn); 595 #undef BATCH_SIZE 596 #undef WARNING_RATE_LIMIT 597 } 598 599 /*ARGSUSED*/ 600 static void 601 xnb_free_page(xnb_t *xnbp, mfn_t mfn) 602 { 603 int r; 604 pfn_t pfn; 605 606 pfn = xen_assign_pfn(mfn); 607 pfnzero(pfn, 0, PAGESIZE); 608 xen_release_pfn(pfn); 609 610 /* 611 * This happens only in the error path, so batching is 612 * not worth the complication. 613 */ 614 if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) { 615 cmn_err(CE_WARN, "free_page: cannot decrease memory " 616 "reservation (%d): page kept but unusable (mfn = 0x%lx).", 617 r, mfn); 618 } 619 } 620 621 /* 622 * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but 623 * using local variables. 624 */ 625 #define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \ 626 ((((_r)->sring->req_prod - loop) < \ 627 (RING_SIZE(_r) - (loop - prod))) ? \ 628 ((_r)->sring->req_prod - loop) : \ 629 (RING_SIZE(_r) - (loop - prod))) 630 631 mblk_t * 632 xnb_to_peer(xnb_t *xnbp, mblk_t *mp) 633 { 634 mblk_t *free = mp, *prev = NULL; 635 size_t len; 636 gnttab_transfer_t *gop; 637 boolean_t notify; 638 RING_IDX loop, prod, end; 639 640 /* 641 * For each packet the sequence of operations is: 642 * 643 * 1. get a new page from the hypervisor. 644 * 2. get a request slot from the ring. 645 * 3. copy the data into the new page. 646 * 4. transfer the page to the peer. 647 * 5. update the request slot. 648 * 6. kick the peer. 649 * 7. free mp. 650 * 651 * In order to reduce the number of hypercalls, we prepare 652 * several packets for the peer and perform a single hypercall 653 * to transfer them. 654 */ 655 656 mutex_enter(&xnbp->xnb_tx_lock); 657 658 /* 659 * If we are not connected to the peer or have not yet 660 * finished hotplug it is too early to pass packets to the 661 * peer. 662 */ 663 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 664 mutex_exit(&xnbp->xnb_tx_lock); 665 DTRACE_PROBE(flip_tx_too_early); 666 xnbp->xnb_stat_tx_too_early++; 667 return (mp); 668 } 669 670 loop = xnbp->xnb_rx_ring.req_cons; 671 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 672 gop = xnbp->xnb_tx_top; 673 674 while ((mp != NULL) && 675 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 676 677 mfn_t mfn; 678 pfn_t pfn; 679 netif_rx_request_t *rxreq; 680 netif_rx_response_t *rxresp; 681 char *valoop; 682 size_t offset; 683 mblk_t *ml; 684 uint16_t cksum_flags; 685 686 /* 1 */ 687 if ((mfn = xnb_alloc_page(xnbp)) == 0) { 688 xnbp->xnb_stat_xmit_defer++; 689 break; 690 } 691 692 /* 2 */ 693 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 694 695 #ifdef XNB_DEBUG 696 if (!(rxreq->id < NET_RX_RING_SIZE)) 697 cmn_err(CE_PANIC, "xnb_to_peer: " 698 "id %d out of range in request 0x%p", 699 rxreq->id, (void *)rxreq); 700 #endif /* XNB_DEBUG */ 701 702 /* Assign a pfn and map the new page at the allocated va. */ 703 pfn = xen_assign_pfn(mfn); 704 hat_devload(kas.a_hat, xnbp->xnb_tx_va, PAGESIZE, 705 pfn, PROT_READ | PROT_WRITE, HAT_LOAD); 706 707 offset = TX_BUFFER_HEADROOM; 708 709 /* 3 */ 710 len = 0; 711 valoop = xnbp->xnb_tx_va + offset; 712 for (ml = mp; ml != NULL; ml = ml->b_cont) { 713 size_t chunk = ml->b_wptr - ml->b_rptr; 714 715 bcopy(ml->b_rptr, valoop, chunk); 716 valoop += chunk; 717 len += chunk; 718 } 719 720 ASSERT(len + offset < PAGESIZE); 721 722 /* Release the pfn. */ 723 hat_unload(kas.a_hat, xnbp->xnb_tx_va, PAGESIZE, 724 HAT_UNLOAD_UNMAP); 725 xen_release_pfn(pfn); 726 727 /* 4 */ 728 gop->mfn = mfn; 729 gop->domid = xnbp->xnb_peer; 730 gop->ref = rxreq->gref; 731 732 /* 5.1 */ 733 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 734 rxresp->offset = offset; 735 rxresp->flags = 0; 736 737 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 738 if (cksum_flags != 0) 739 xnbp->xnb_stat_tx_cksum_deferred++; 740 rxresp->flags |= cksum_flags; 741 742 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 743 rxresp->status = len; 744 745 loop++; 746 prod++; 747 gop++; 748 prev = mp; 749 mp = mp->b_next; 750 } 751 752 /* 753 * Did we actually do anything? 754 */ 755 if (loop == xnbp->xnb_rx_ring.req_cons) { 756 mutex_exit(&xnbp->xnb_tx_lock); 757 return (mp); 758 } 759 760 end = loop; 761 762 /* 763 * Unlink the end of the 'done' list from the remainder. 764 */ 765 ASSERT(prev != NULL); 766 prev->b_next = NULL; 767 768 if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_tx_top, 769 loop - xnbp->xnb_rx_ring.req_cons) != 0) { 770 cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed"); 771 } 772 773 loop = xnbp->xnb_rx_ring.req_cons; 774 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 775 gop = xnbp->xnb_tx_top; 776 777 while (loop < end) { 778 int16_t status = NETIF_RSP_OKAY; 779 780 if (gop->status != 0) { 781 status = NETIF_RSP_ERROR; 782 783 /* 784 * If the status is anything other than 785 * GNTST_bad_page then we don't own the page 786 * any more, so don't try to give it back. 787 */ 788 if (gop->status != GNTST_bad_page) 789 gop->mfn = 0; 790 } else { 791 /* The page is no longer ours. */ 792 gop->mfn = 0; 793 } 794 795 if (gop->mfn != 0) 796 /* 797 * Give back the page, as we won't be using 798 * it. 799 */ 800 xnb_free_page(xnbp, gop->mfn); 801 else 802 /* 803 * We gave away a page, update our accounting 804 * now. 805 */ 806 balloon_drv_subtracted(1); 807 808 /* 5.2 */ 809 if (status != NETIF_RSP_OKAY) { 810 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 811 status; 812 } else { 813 xnbp->xnb_stat_opackets++; 814 xnbp->xnb_stat_obytes += len; 815 } 816 817 loop++; 818 prod++; 819 gop++; 820 } 821 822 xnbp->xnb_rx_ring.req_cons = loop; 823 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 824 825 /* 6 */ 826 /* LINTED: constant in conditional context */ 827 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 828 if (notify) { 829 ec_notify_via_evtchn(xnbp->xnb_evtchn); 830 xnbp->xnb_stat_tx_notify_sent++; 831 } else { 832 xnbp->xnb_stat_tx_notify_deferred++; 833 } 834 835 if (mp != NULL) 836 xnbp->xnb_stat_xmit_defer++; 837 838 mutex_exit(&xnbp->xnb_tx_lock); 839 840 /* Free mblk_t's that we consumed. */ 841 freemsgchain(free); 842 843 return (mp); 844 } 845 846 /* helper functions for xnb_copy_to_peer */ 847 848 /* 849 * Grow the array of copy operation descriptors. 850 * Returns a pointer to the next available entry. 851 */ 852 gnttab_copy_t * 853 grow_cpop_area(xnb_t *xnbp, gnttab_copy_t *o_cpop) 854 { 855 /* 856 * o_cpop (arg.1) is a ptr to the area we would like to copy 857 * something into but cannot, because we haven't alloc'ed it 858 * yet, or NULL. 859 * old_cpop and new_cpop (local) are pointers to old/new 860 * versions of xnbp->xnb_tx_cpop. 861 */ 862 gnttab_copy_t *new_cpop, *old_cpop, *ret_cpop; 863 size_t newcount; 864 865 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 866 867 old_cpop = xnbp->xnb_tx_cpop; 868 /* 869 * o_cpop is a pointer into the array pointed to by old_cpop; 870 * it would be an error for exactly one of these pointers to be NULL. 871 * We shouldn't call this function if xnb_tx_cpop has already 872 * been allocated, but we're starting to fill it from the beginning 873 * again. 874 */ 875 ASSERT((o_cpop == NULL && old_cpop == NULL) || 876 (o_cpop != NULL && old_cpop != NULL && o_cpop != old_cpop)); 877 878 newcount = xnbp->xnb_cpop_sz + CPOP_DEFCNT; 879 880 new_cpop = kmem_alloc(sizeof (*new_cpop) * newcount, KM_NOSLEEP); 881 if (new_cpop == NULL) { 882 xnbp->xnb_stat_other_allocation_failure++; 883 return (NULL); 884 } 885 886 if (o_cpop != NULL) { 887 size_t offset = (o_cpop - old_cpop); 888 889 /* we only need to move the parts in use ... */ 890 (void) memmove(new_cpop, old_cpop, xnbp->xnb_cpop_sz * 891 (sizeof (*old_cpop))); 892 893 kmem_free(old_cpop, xnbp->xnb_cpop_sz * sizeof (*old_cpop)); 894 895 ret_cpop = new_cpop + offset; 896 } else { 897 ret_cpop = new_cpop; 898 } 899 900 xnbp->xnb_tx_cpop = new_cpop; 901 xnbp->xnb_cpop_sz = newcount; 902 903 xnbp->xnb_stat_tx_cpoparea_grown++; 904 905 return (ret_cpop); 906 } 907 908 /* 909 * Check whether an address is on a page that's foreign to this domain. 910 */ 911 static boolean_t 912 is_foreign(void *addr) 913 { 914 pfn_t pfn = hat_getpfnum(kas.a_hat, addr); 915 916 return (pfn & PFN_IS_FOREIGN_MFN ? B_TRUE : B_FALSE); 917 } 918 919 /* 920 * Insert a newly allocated mblk into a chain, replacing the old one. 921 */ 922 static mblk_t * 923 replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev) 924 { 925 uint32_t start, stuff, end, value, flags; 926 mblk_t *new_mp; 927 928 new_mp = copyb(mp); 929 if (new_mp == NULL) 930 cmn_err(CE_PANIC, "replace_msg: cannot alloc new message" 931 "for %p, len %lu", (void *) mp, len); 932 933 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 934 (void) hcksum_assoc(new_mp, NULL, NULL, start, stuff, end, value, 935 flags, KM_NOSLEEP); 936 937 new_mp->b_next = mp->b_next; 938 new_mp->b_prev = mp->b_prev; 939 new_mp->b_cont = mp->b_cont; 940 941 /* Make sure we only overwrite pointers to the mblk being replaced. */ 942 if (mp_prev != NULL && mp_prev->b_next == mp) 943 mp_prev->b_next = new_mp; 944 945 if (ml_prev != NULL && ml_prev->b_cont == mp) 946 ml_prev->b_cont = new_mp; 947 948 mp->b_next = mp->b_prev = mp->b_cont = NULL; 949 freemsg(mp); 950 951 return (new_mp); 952 } 953 954 /* 955 * Set all the fields in a gnttab_copy_t. 956 */ 957 static void 958 setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr, 959 size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref) 960 { 961 ASSERT(xnbp != NULL && gp != NULL); 962 963 gp->source.offset = s_off; 964 gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr)); 965 gp->source.domid = DOMID_SELF; 966 967 gp->len = (uint16_t)len; 968 gp->flags = GNTCOPY_dest_gref; 969 gp->status = 0; 970 971 gp->dest.u.ref = d_ref; 972 gp->dest.offset = d_off; 973 gp->dest.domid = xnbp->xnb_peer; 974 } 975 976 mblk_t * 977 xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp) 978 { 979 mblk_t *free = mp, *mp_prev = NULL, *saved_mp = mp; 980 mblk_t *ml, *ml_prev; 981 gnttab_copy_t *gop_cp; 982 boolean_t notify; 983 RING_IDX loop, prod; 984 int i; 985 986 if (!xnbp->xnb_hv_copy) 987 return (xnb_to_peer(xnbp, mp)); 988 989 /* 990 * For each packet the sequence of operations is: 991 * 992 * 1. get a request slot from the ring. 993 * 2. set up data for hypercall (see NOTE below) 994 * 3. have the hypervisore copy the data 995 * 4. update the request slot. 996 * 5. kick the peer. 997 * 998 * NOTE ad 2. 999 * In order to reduce the number of hypercalls, we prepare 1000 * several packets (mp->b_cont != NULL) for the peer and 1001 * perform a single hypercall to transfer them. 1002 * We also have to set up a seperate copy operation for 1003 * every page. 1004 * 1005 * If we have more than one message (mp->b_next != NULL), 1006 * we do this whole dance repeatedly. 1007 */ 1008 1009 mutex_enter(&xnbp->xnb_tx_lock); 1010 1011 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 1012 mutex_exit(&xnbp->xnb_tx_lock); 1013 DTRACE_PROBE(copy_tx_too_early); 1014 xnbp->xnb_stat_tx_too_early++; 1015 return (mp); 1016 } 1017 1018 loop = xnbp->xnb_rx_ring.req_cons; 1019 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 1020 1021 while ((mp != NULL) && 1022 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 1023 netif_rx_request_t *rxreq; 1024 netif_rx_response_t *rxresp; 1025 size_t offset, d_offset; 1026 size_t len; 1027 uint16_t cksum_flags; 1028 int16_t status = NETIF_RSP_OKAY; 1029 int item_count; 1030 1031 /* 1 */ 1032 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 1033 1034 #ifdef XNB_DEBUG 1035 if (!(rxreq->id < NET_RX_RING_SIZE)) 1036 cmn_err(CE_PANIC, "xnb_copy_to_peer: " 1037 "id %d out of range in request 0x%p", 1038 rxreq->id, (void *)rxreq); 1039 #endif /* XNB_DEBUG */ 1040 1041 /* 2 */ 1042 d_offset = offset = TX_BUFFER_HEADROOM; 1043 len = 0; 1044 item_count = 0; 1045 1046 gop_cp = xnbp->xnb_tx_cpop; 1047 1048 /* 1049 * We walk the b_cont pointers and set up a gop_cp 1050 * structure for every page in every data block we have. 1051 */ 1052 /* 2a */ 1053 for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) { 1054 size_t chunk = ml->b_wptr - ml->b_rptr; 1055 uchar_t *r_tmp, *rpt_align; 1056 size_t r_offset; 1057 1058 /* 1059 * If we get an mblk on a page that doesn't belong to 1060 * this domain, get a new mblk to replace the old one. 1061 */ 1062 if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) { 1063 mblk_t *ml_new = replace_msg(ml, chunk, 1064 mp_prev, ml_prev); 1065 1066 /* We can still use old ml, but not *ml! */ 1067 if (free == ml) 1068 free = ml_new; 1069 if (mp == ml) 1070 mp = ml_new; 1071 ml = ml_new; 1072 1073 xnbp->xnb_stat_tx_foreign_page++; 1074 } 1075 1076 rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr); 1077 r_offset = (uint16_t)(ml->b_rptr - rpt_align); 1078 r_tmp = ml->b_rptr; 1079 1080 if (d_offset + chunk > PAGESIZE) 1081 cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p " 1082 "(svd: %p), ml %p,rpt_alg. %p, d_offset " 1083 "(%lu) + chunk (%lu) > PAGESIZE %d!", 1084 (void *)mp, (void *)saved_mp, (void *)ml, 1085 (void *)rpt_align, 1086 d_offset, chunk, (int)PAGESIZE); 1087 1088 while (chunk > 0) { 1089 size_t part_len; 1090 1091 item_count++; 1092 if (item_count > xnbp->xnb_cpop_sz) { 1093 gop_cp = grow_cpop_area(xnbp, gop_cp); 1094 if (gop_cp == NULL) 1095 goto failure; 1096 } 1097 /* 1098 * If our mblk crosses a page boundary, we need 1099 * to do a seperate copy for every page. 1100 */ 1101 if (r_offset + chunk > PAGESIZE) { 1102 part_len = PAGESIZE - r_offset; 1103 1104 DTRACE_PROBE3(mblk_page_crossed, 1105 (mblk_t *), ml, int, chunk, int, 1106 (int)r_offset); 1107 1108 xnbp->xnb_stat_tx_pagebndry_crossed++; 1109 } else { 1110 part_len = chunk; 1111 } 1112 1113 setup_gop(xnbp, gop_cp, r_tmp, r_offset, 1114 d_offset, part_len, rxreq->gref); 1115 1116 chunk -= part_len; 1117 1118 len += part_len; 1119 d_offset += part_len; 1120 r_tmp += part_len; 1121 /* 1122 * The 2nd, 3rd ... last copies will always 1123 * start at r_tmp, therefore r_offset is 0. 1124 */ 1125 r_offset = 0; 1126 gop_cp++; 1127 } 1128 ml_prev = ml; 1129 DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int, 1130 chunk, int, len, int, item_count); 1131 } 1132 /* 3 */ 1133 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_tx_cpop, 1134 item_count) != 0) { 1135 cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed"); 1136 DTRACE_PROBE(HV_granttableopfailed); 1137 } 1138 1139 /* 4 */ 1140 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 1141 rxresp->offset = offset; 1142 1143 rxresp->flags = 0; 1144 1145 DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int, 1146 (int)rxresp->offset, int, (int)rxresp->flags, int, 1147 (int)rxresp->status); 1148 1149 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 1150 if (cksum_flags != 0) 1151 xnbp->xnb_stat_tx_cksum_deferred++; 1152 rxresp->flags |= cksum_flags; 1153 1154 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 1155 rxresp->status = len; 1156 1157 DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int, 1158 (int)rxresp->offset, int, (int)rxresp->flags, int, 1159 (int)rxresp->status); 1160 1161 for (i = 0; i < item_count; i++) { 1162 if (xnbp->xnb_tx_cpop[i].status != 0) { 1163 DTRACE_PROBE2(cpop__status__nonnull, int, 1164 (int)xnbp->xnb_tx_cpop[i].status, 1165 int, i); 1166 status = NETIF_RSP_ERROR; 1167 } 1168 } 1169 1170 /* 5.2 */ 1171 if (status != NETIF_RSP_OKAY) { 1172 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 1173 status; 1174 xnbp->xnb_stat_tx_rsp_notok++; 1175 } else { 1176 xnbp->xnb_stat_opackets++; 1177 xnbp->xnb_stat_obytes += len; 1178 } 1179 1180 loop++; 1181 prod++; 1182 mp_prev = mp; 1183 mp = mp->b_next; 1184 } 1185 failure: 1186 /* 1187 * Did we actually do anything? 1188 */ 1189 if (loop == xnbp->xnb_rx_ring.req_cons) { 1190 mutex_exit(&xnbp->xnb_tx_lock); 1191 return (mp); 1192 } 1193 1194 /* 1195 * Unlink the end of the 'done' list from the remainder. 1196 */ 1197 ASSERT(mp_prev != NULL); 1198 mp_prev->b_next = NULL; 1199 1200 xnbp->xnb_rx_ring.req_cons = loop; 1201 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 1202 1203 /* 6 */ 1204 /* LINTED: constant in conditional context */ 1205 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 1206 if (notify) { 1207 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1208 xnbp->xnb_stat_tx_notify_sent++; 1209 } else { 1210 xnbp->xnb_stat_tx_notify_deferred++; 1211 } 1212 1213 if (mp != NULL) 1214 xnbp->xnb_stat_xmit_defer++; 1215 1216 mutex_exit(&xnbp->xnb_tx_lock); 1217 1218 /* Free mblk_t structs we have consumed. */ 1219 freemsgchain(free); 1220 1221 return (mp); 1222 } 1223 1224 /*ARGSUSED*/ 1225 static int 1226 xnb_rxbuf_constructor(void *buf, void *arg, int kmflag) 1227 { 1228 xnb_rxbuf_t *rxp = buf; 1229 1230 bzero(rxp, sizeof (*rxp)); 1231 1232 rxp->xr_free_rtn.free_func = xnb_rx_complete; 1233 rxp->xr_free_rtn.free_arg = (caddr_t)rxp; 1234 1235 rxp->xr_mop.host_addr = 1236 (uint64_t)(uintptr_t)vmem_alloc(heap_arena, PAGESIZE, 1237 ((kmflag & KM_NOSLEEP) == KM_NOSLEEP) ? 1238 VM_NOSLEEP : VM_SLEEP); 1239 1240 if (rxp->xr_mop.host_addr == NULL) { 1241 cmn_err(CE_WARN, "xnb_rxbuf_constructor: " 1242 "cannot get address space"); 1243 return (-1); 1244 } 1245 1246 /* 1247 * Have the hat ensure that page table exists for the VA. 1248 */ 1249 hat_prepare_mapping(kas.a_hat, 1250 (caddr_t)(uintptr_t)rxp->xr_mop.host_addr); 1251 1252 return (0); 1253 } 1254 1255 /*ARGSUSED*/ 1256 static void 1257 xnb_rxbuf_destructor(void *buf, void *arg) 1258 { 1259 xnb_rxbuf_t *rxp = buf; 1260 1261 ASSERT(rxp->xr_mop.host_addr != NULL); 1262 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == 0); 1263 1264 hat_release_mapping(kas.a_hat, 1265 (caddr_t)(uintptr_t)rxp->xr_mop.host_addr); 1266 vmem_free(heap_arena, 1267 (caddr_t)(uintptr_t)rxp->xr_mop.host_addr, PAGESIZE); 1268 } 1269 1270 static void 1271 xnb_rx_notify_peer(xnb_t *xnbp) 1272 { 1273 boolean_t notify; 1274 1275 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1276 1277 /* LINTED: constant in conditional context */ 1278 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify); 1279 if (notify) { 1280 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1281 xnbp->xnb_stat_rx_notify_sent++; 1282 } else { 1283 xnbp->xnb_stat_rx_notify_deferred++; 1284 } 1285 } 1286 1287 static void 1288 xnb_rx_complete(xnb_rxbuf_t *rxp) 1289 { 1290 xnb_t *xnbp = rxp->xr_xnbp; 1291 1292 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == XNB_RXBUF_INUSE); 1293 1294 mutex_enter(&xnbp->xnb_rx_lock); 1295 xnb_rx_schedule_unmop(xnbp, &rxp->xr_mop, rxp); 1296 mutex_exit(&xnbp->xnb_rx_lock); 1297 } 1298 1299 static void 1300 xnb_rx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status) 1301 { 1302 RING_IDX i; 1303 netif_tx_response_t *txresp; 1304 1305 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1306 1307 i = xnbp->xnb_tx_ring.rsp_prod_pvt; 1308 1309 txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i); 1310 txresp->id = id; 1311 txresp->status = status; 1312 1313 xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1; 1314 1315 /* 1316 * Note that we don't push the change to the peer here - that 1317 * is the callers responsibility. 1318 */ 1319 } 1320 1321 static void 1322 xnb_rx_schedule_unmop(xnb_t *xnbp, gnttab_map_grant_ref_t *mop, 1323 xnb_rxbuf_t *rxp) 1324 { 1325 gnttab_unmap_grant_ref_t *unmop; 1326 int u_count; 1327 int reqs_on_ring; 1328 1329 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1330 ASSERT(xnbp->xnb_rx_unmop_count < NET_TX_RING_SIZE); 1331 1332 u_count = xnbp->xnb_rx_unmop_count++; 1333 1334 /* Cache data for the time when we actually unmap grant refs */ 1335 xnbp->xnb_rx_unmop_rxp[u_count] = rxp; 1336 1337 unmop = &xnbp->xnb_rx_unmop[u_count]; 1338 unmop->host_addr = mop->host_addr; 1339 unmop->dev_bus_addr = mop->dev_bus_addr; 1340 unmop->handle = mop->handle; 1341 1342 /* 1343 * We cannot check the ring once we're disconnected from it. Batching 1344 * doesn't seem to be a useful optimisation in this case either, 1345 * so we directly call into the actual unmap function. 1346 */ 1347 if (xnbp->xnb_connected) { 1348 reqs_on_ring = RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_tx_ring); 1349 1350 /* 1351 * By tuning xnb_unmop_hiwat to N, we can emulate "N per batch" 1352 * or (with N == 1) "immediate unmop" behaviour. 1353 * The "> xnb_unmop_lowwat" is a guard against ring exhaustion. 1354 */ 1355 if (xnbp->xnb_rx_unmop_count < xnb_unmop_hiwat && 1356 reqs_on_ring > xnb_unmop_lowwat) 1357 return; 1358 } 1359 1360 xnb_rx_perform_pending_unmop(xnbp); 1361 } 1362 1363 /* 1364 * Here we perform the actual unmapping of the data that was 1365 * accumulated in xnb_rx_schedule_unmop(). 1366 * Note that it is the caller's responsibility to make sure that 1367 * there's actually something there to unmop. 1368 */ 1369 static void 1370 xnb_rx_perform_pending_unmop(xnb_t *xnbp) 1371 { 1372 RING_IDX loop; 1373 #ifdef XNB_DEBUG 1374 gnttab_unmap_grant_ref_t *unmop; 1375 #endif /* XNB_DEBUG */ 1376 1377 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1378 ASSERT(xnbp->xnb_rx_unmop_count > 0); 1379 1380 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1381 xnbp->xnb_rx_unmop, xnbp->xnb_rx_unmop_count) < 0) { 1382 cmn_err(CE_WARN, "xnb_rx_perform_pending_unmop: " 1383 "unmap grant operation failed, " 1384 "%d pages lost", xnbp->xnb_rx_unmop_count); 1385 } 1386 1387 #ifdef XNB_DEBUG 1388 for (loop = 0, unmop = xnbp->xnb_rx_unmop; 1389 loop < xnbp->xnb_rx_unmop_count; 1390 loop++, unmop++) { 1391 if (unmop->status != 0) { 1392 cmn_err(CE_WARN, "xnb_rx_perform_pending_unmop: " 1393 "unmap grant reference failed (%d)", 1394 unmop->status); 1395 } 1396 } 1397 #endif /* XNB_DEBUG */ 1398 1399 for (loop = 0; loop < xnbp->xnb_rx_unmop_count; loop++) { 1400 xnb_rxbuf_t *rxp = xnbp->xnb_rx_unmop_rxp[loop]; 1401 1402 if (rxp == NULL) 1403 cmn_err(CE_PANIC, 1404 "xnb_rx_perform_pending_unmop: " 1405 "unexpected NULL rxp (loop %d; count %d)!", 1406 loop, xnbp->xnb_rx_unmop_count); 1407 1408 if (xnbp->xnb_connected) 1409 xnb_rx_mark_complete(xnbp, rxp->xr_id, rxp->xr_status); 1410 xnb_rxbuf_put(xnbp, rxp); 1411 } 1412 if (xnbp->xnb_connected) 1413 xnb_rx_notify_peer(xnbp); 1414 1415 xnbp->xnb_rx_unmop_count = 0; 1416 1417 #ifdef XNB_DEBUG 1418 bzero(xnbp->xnb_rx_unmop, sizeof (xnbp->xnb_rx_unmop)); 1419 bzero(xnbp->xnb_rx_unmop_rxp, sizeof (xnbp->xnb_rx_unmop_rxp)); 1420 #endif /* XNB_DEBUG */ 1421 } 1422 1423 static xnb_rxbuf_t * 1424 xnb_rxbuf_get(xnb_t *xnbp, int flags) 1425 { 1426 xnb_rxbuf_t *rxp; 1427 1428 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1429 1430 rxp = kmem_cache_alloc(xnb_rxbuf_cachep, flags); 1431 if (rxp != NULL) { 1432 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == 0); 1433 rxp->xr_flags |= XNB_RXBUF_INUSE; 1434 1435 rxp->xr_xnbp = xnbp; 1436 rxp->xr_mop.dom = xnbp->xnb_peer; 1437 1438 rxp->xr_mop.flags = GNTMAP_host_map; 1439 if (!xnbp->xnb_rx_pages_writable) 1440 rxp->xr_mop.flags |= GNTMAP_readonly; 1441 1442 xnbp->xnb_rx_buf_count++; 1443 } 1444 1445 return (rxp); 1446 } 1447 1448 static void 1449 xnb_rxbuf_put(xnb_t *xnbp, xnb_rxbuf_t *rxp) 1450 { 1451 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1452 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == XNB_RXBUF_INUSE); 1453 1454 rxp->xr_flags &= ~XNB_RXBUF_INUSE; 1455 xnbp->xnb_rx_buf_count--; 1456 1457 kmem_cache_free(xnb_rxbuf_cachep, rxp); 1458 } 1459 1460 static mblk_t * 1461 xnb_recv(xnb_t *xnbp) 1462 { 1463 RING_IDX start, end, loop; 1464 gnttab_map_grant_ref_t *mop; 1465 xnb_rxbuf_t **rxpp; 1466 netif_tx_request_t *txreq; 1467 boolean_t work_to_do; 1468 mblk_t *head, *tail; 1469 /* 1470 * If the peer granted a read-only mapping to the page then we 1471 * must copy the data, as the local protocol stack (should the 1472 * packet be destined for this host) will modify the packet 1473 * 'in place'. 1474 */ 1475 boolean_t copy = !xnbp->xnb_rx_pages_writable; 1476 1477 /* 1478 * For each individual request, the sequence of actions is: 1479 * 1480 * 1. get the request. 1481 * 2. map the page based on the grant ref. 1482 * 3. allocate an mblk, copy the data to it. 1483 * 4. release the grant. 1484 * 5. update the ring. 1485 * 6. pass the packet upward. 1486 * 7. kick the peer. 1487 * 1488 * In fact, we try to perform the grant operations in batches, 1489 * so there are two loops. 1490 */ 1491 1492 head = tail = NULL; 1493 around: 1494 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1495 1496 /* LINTED: constant in conditional context */ 1497 RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do); 1498 if (!work_to_do) { 1499 finished: 1500 return (head); 1501 } 1502 1503 start = xnbp->xnb_tx_ring.req_cons; 1504 end = xnbp->xnb_tx_ring.sring->req_prod; 1505 1506 for (loop = start, mop = xnbp->xnb_rx_mop, rxpp = xnbp->xnb_rx_bufp; 1507 loop != end; 1508 loop++, mop++, rxpp++) { 1509 xnb_rxbuf_t *rxp; 1510 1511 rxp = xnb_rxbuf_get(xnbp, KM_NOSLEEP); 1512 if (rxp == NULL) 1513 break; 1514 1515 ASSERT(xnbp->xnb_rx_pages_writable || 1516 ((rxp->xr_mop.flags & GNTMAP_readonly) 1517 == GNTMAP_readonly)); 1518 1519 rxp->xr_mop.ref = 1520 RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop)->gref; 1521 1522 *mop = rxp->xr_mop; 1523 *rxpp = rxp; 1524 } 1525 1526 if ((loop - start) == 0) 1527 goto finished; 1528 1529 end = loop; 1530 1531 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1532 xnbp->xnb_rx_mop, end - start) != 0) { 1533 1534 cmn_err(CE_WARN, "xnb_recv: map grant operation failed"); 1535 1536 loop = start; 1537 rxpp = xnbp->xnb_rx_bufp; 1538 1539 while (loop != end) { 1540 xnb_rxbuf_put(xnbp, *rxpp); 1541 1542 loop++; 1543 rxpp++; 1544 } 1545 1546 goto finished; 1547 } 1548 1549 for (loop = start, mop = xnbp->xnb_rx_mop, rxpp = xnbp->xnb_rx_bufp; 1550 loop != end; 1551 loop++, mop++, rxpp++) { 1552 mblk_t *mp = NULL; 1553 int16_t status = NETIF_RSP_OKAY; 1554 xnb_rxbuf_t *rxp = *rxpp; 1555 1556 if (mop->status != 0) { 1557 cmn_err(CE_WARN, "xnb_recv: " 1558 "failed to map buffer: %d", 1559 mop->status); 1560 status = NETIF_RSP_ERROR; 1561 } 1562 1563 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop); 1564 1565 if (status == NETIF_RSP_OKAY) { 1566 if (copy) { 1567 mp = allocb(txreq->size, BPRI_MED); 1568 if (mp == NULL) { 1569 status = NETIF_RSP_ERROR; 1570 xnbp->xnb_stat_rx_allocb_failed++; 1571 } else { 1572 bcopy((caddr_t)(uintptr_t) 1573 mop->host_addr + txreq->offset, 1574 mp->b_wptr, txreq->size); 1575 mp->b_wptr += txreq->size; 1576 } 1577 } else { 1578 mp = desballoc((uchar_t *)(uintptr_t) 1579 mop->host_addr + txreq->offset, 1580 txreq->size, 0, &rxp->xr_free_rtn); 1581 if (mp == NULL) { 1582 status = NETIF_RSP_ERROR; 1583 xnbp->xnb_stat_rx_allocb_failed++; 1584 } else { 1585 rxp->xr_id = txreq->id; 1586 rxp->xr_status = status; 1587 rxp->xr_mop = *mop; 1588 1589 mp->b_wptr += txreq->size; 1590 } 1591 } 1592 1593 /* 1594 * If we have a buffer and there are checksum 1595 * flags, process them appropriately. 1596 */ 1597 if ((mp != NULL) && 1598 ((txreq->flags & 1599 (NETTXF_csum_blank | NETTXF_data_validated)) 1600 != 0)) { 1601 mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp, 1602 mp, txreq->flags); 1603 xnbp->xnb_stat_rx_cksum_no_need++; 1604 } 1605 } 1606 1607 if (copy || (mp == NULL)) { 1608 rxp->xr_status = status; 1609 rxp->xr_id = txreq->id; 1610 xnb_rx_schedule_unmop(xnbp, mop, rxp); 1611 } 1612 1613 if (mp != NULL) { 1614 xnbp->xnb_stat_ipackets++; 1615 xnbp->xnb_stat_rbytes += txreq->size; 1616 1617 mp->b_next = NULL; 1618 if (head == NULL) { 1619 ASSERT(tail == NULL); 1620 head = mp; 1621 } else { 1622 ASSERT(tail != NULL); 1623 tail->b_next = mp; 1624 } 1625 tail = mp; 1626 } 1627 } 1628 1629 xnbp->xnb_tx_ring.req_cons = loop; 1630 1631 goto around; 1632 /* NOTREACHED */ 1633 } 1634 1635 /* 1636 * intr() -- ring interrupt service routine 1637 */ 1638 static uint_t 1639 xnb_intr(caddr_t arg) 1640 { 1641 xnb_t *xnbp = (xnb_t *)arg; 1642 mblk_t *mp; 1643 1644 xnbp->xnb_stat_intr++; 1645 1646 mutex_enter(&xnbp->xnb_rx_lock); 1647 1648 ASSERT(xnbp->xnb_connected); 1649 1650 mp = xnb_recv(xnbp); 1651 1652 mutex_exit(&xnbp->xnb_rx_lock); 1653 1654 if (!xnbp->xnb_hotplugged) { 1655 xnbp->xnb_stat_rx_too_early++; 1656 goto fail; 1657 } 1658 if (mp == NULL) { 1659 xnbp->xnb_stat_spurious_intr++; 1660 goto fail; 1661 } 1662 1663 xnbp->xnb_flavour->xf_recv(xnbp, mp); 1664 1665 return (DDI_INTR_CLAIMED); 1666 1667 fail: 1668 freemsgchain(mp); 1669 return (DDI_INTR_CLAIMED); 1670 } 1671 1672 static boolean_t 1673 xnb_connect_rings(dev_info_t *dip) 1674 { 1675 xnb_t *xnbp = ddi_get_driver_private(dip); 1676 char *oename; 1677 struct gnttab_map_grant_ref map_op; 1678 evtchn_port_t evtchn; 1679 int i; 1680 1681 /* 1682 * Cannot attempt to connect the rings if already connected. 1683 */ 1684 ASSERT(!xnbp->xnb_connected); 1685 1686 oename = xvdi_get_oename(dip); 1687 1688 if (xenbus_gather(XBT_NULL, oename, 1689 "event-channel", "%u", &evtchn, 1690 "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref, 1691 "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref, 1692 NULL) != 0) { 1693 cmn_err(CE_WARN, "xnb_connect_rings: " 1694 "cannot read other-end details from %s", 1695 oename); 1696 goto fail; 1697 } 1698 1699 if (xenbus_scanf(XBT_NULL, oename, 1700 "feature-tx-writable", "%d", &i) != 0) 1701 i = 0; 1702 if (i != 0) 1703 xnbp->xnb_rx_pages_writable = B_TRUE; 1704 1705 if (xenbus_scanf(XBT_NULL, oename, 1706 "feature-no-csum-offload", "%d", &i) != 0) 1707 i = 0; 1708 if ((i == 1) || !xnbp->xnb_cksum_offload) 1709 xnbp->xnb_cksum_offload = B_FALSE; 1710 1711 /* Check whether our peer knows and requests hypervisor copy */ 1712 if (xenbus_scanf(XBT_NULL, oename, "request-rx-copy", "%d", &i) 1713 != 0) 1714 i = 0; 1715 if (i != 0) 1716 xnbp->xnb_hv_copy = B_TRUE; 1717 1718 /* 1719 * 1. allocate a vaddr for the tx page, one for the rx page. 1720 * 2. call GNTTABOP_map_grant_ref to map the relevant pages 1721 * into the allocated vaddr (one for tx, one for rx). 1722 * 3. call EVTCHNOP_bind_interdomain to have the event channel 1723 * bound to this domain. 1724 * 4. associate the event channel with an interrupt. 1725 * 5. declare ourselves connected. 1726 * 6. enable the interrupt. 1727 */ 1728 1729 /* 1.tx */ 1730 xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1731 0, 0, 0, 0, VM_SLEEP); 1732 ASSERT(xnbp->xnb_tx_ring_addr != NULL); 1733 1734 /* 2.tx */ 1735 map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr); 1736 map_op.flags = GNTMAP_host_map; 1737 map_op.ref = xnbp->xnb_tx_ring_ref; 1738 map_op.dom = xnbp->xnb_peer; 1739 hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr); 1740 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1741 &map_op, 1) != 0 || map_op.status != 0) { 1742 cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page."); 1743 goto fail; 1744 } 1745 xnbp->xnb_tx_ring_handle = map_op.handle; 1746 1747 /* LINTED: constant in conditional context */ 1748 BACK_RING_INIT(&xnbp->xnb_tx_ring, 1749 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1750 1751 /* 1.rx */ 1752 xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1753 0, 0, 0, 0, VM_SLEEP); 1754 ASSERT(xnbp->xnb_rx_ring_addr != NULL); 1755 1756 /* 2.rx */ 1757 map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr); 1758 map_op.flags = GNTMAP_host_map; 1759 map_op.ref = xnbp->xnb_rx_ring_ref; 1760 map_op.dom = xnbp->xnb_peer; 1761 hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr); 1762 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1763 &map_op, 1) != 0 || map_op.status != 0) { 1764 cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page."); 1765 goto fail; 1766 } 1767 xnbp->xnb_rx_ring_handle = map_op.handle; 1768 1769 /* LINTED: constant in conditional context */ 1770 BACK_RING_INIT(&xnbp->xnb_rx_ring, 1771 (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE); 1772 1773 /* 3 */ 1774 if (xvdi_bind_evtchn(dip, evtchn) != DDI_SUCCESS) { 1775 cmn_err(CE_WARN, "xnb_connect_rings: " 1776 "cannot bind event channel %d", xnbp->xnb_evtchn); 1777 xnbp->xnb_evtchn = INVALID_EVTCHN; 1778 goto fail; 1779 } 1780 xnbp->xnb_evtchn = xvdi_get_evtchn(dip); 1781 1782 /* 1783 * It would be good to set the state to XenbusStateConnected 1784 * here as well, but then what if ddi_add_intr() failed? 1785 * Changing the state in the store will be noticed by the peer 1786 * and cannot be "taken back". 1787 */ 1788 mutex_enter(&xnbp->xnb_tx_lock); 1789 mutex_enter(&xnbp->xnb_rx_lock); 1790 1791 /* 5.1 */ 1792 xnbp->xnb_connected = B_TRUE; 1793 1794 mutex_exit(&xnbp->xnb_rx_lock); 1795 mutex_exit(&xnbp->xnb_tx_lock); 1796 1797 /* 4, 6 */ 1798 if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp) 1799 != DDI_SUCCESS) { 1800 cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt"); 1801 goto fail; 1802 } 1803 xnbp->xnb_irq = B_TRUE; 1804 1805 /* 5.2 */ 1806 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1807 1808 return (B_TRUE); 1809 1810 fail: 1811 mutex_enter(&xnbp->xnb_tx_lock); 1812 mutex_enter(&xnbp->xnb_rx_lock); 1813 1814 xnbp->xnb_connected = B_FALSE; 1815 mutex_exit(&xnbp->xnb_rx_lock); 1816 mutex_exit(&xnbp->xnb_tx_lock); 1817 1818 return (B_FALSE); 1819 } 1820 1821 static void 1822 xnb_disconnect_rings(dev_info_t *dip) 1823 { 1824 xnb_t *xnbp = ddi_get_driver_private(dip); 1825 1826 if (xnbp->xnb_irq) { 1827 ddi_remove_intr(dip, 0, NULL); 1828 xnbp->xnb_irq = B_FALSE; 1829 } 1830 1831 if (xnbp->xnb_rx_unmop_count > 0) 1832 xnb_rx_perform_pending_unmop(xnbp); 1833 1834 if (xnbp->xnb_evtchn != INVALID_EVTCHN) { 1835 xvdi_free_evtchn(dip); 1836 xnbp->xnb_evtchn = INVALID_EVTCHN; 1837 } 1838 1839 if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) { 1840 struct gnttab_unmap_grant_ref unmap_op; 1841 1842 unmap_op.host_addr = (uint64_t)(uintptr_t) 1843 xnbp->xnb_rx_ring_addr; 1844 unmap_op.dev_bus_addr = 0; 1845 unmap_op.handle = xnbp->xnb_rx_ring_handle; 1846 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1847 &unmap_op, 1) != 0) 1848 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1849 "cannot unmap rx-ring page (%d)", 1850 unmap_op.status); 1851 1852 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 1853 } 1854 1855 if (xnbp->xnb_rx_ring_addr != NULL) { 1856 hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr); 1857 vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE); 1858 xnbp->xnb_rx_ring_addr = NULL; 1859 } 1860 1861 if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) { 1862 struct gnttab_unmap_grant_ref unmap_op; 1863 1864 unmap_op.host_addr = (uint64_t)(uintptr_t) 1865 xnbp->xnb_tx_ring_addr; 1866 unmap_op.dev_bus_addr = 0; 1867 unmap_op.handle = xnbp->xnb_tx_ring_handle; 1868 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1869 &unmap_op, 1) != 0) 1870 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1871 "cannot unmap tx-ring page (%d)", 1872 unmap_op.status); 1873 1874 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 1875 } 1876 1877 if (xnbp->xnb_tx_ring_addr != NULL) { 1878 hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr); 1879 vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE); 1880 xnbp->xnb_tx_ring_addr = NULL; 1881 } 1882 } 1883 1884 /*ARGSUSED*/ 1885 static void 1886 xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1887 void *arg, void *impl_data) 1888 { 1889 xnb_t *xnbp = ddi_get_driver_private(dip); 1890 XenbusState new_state = *(XenbusState *)impl_data; 1891 1892 ASSERT(xnbp != NULL); 1893 1894 switch (new_state) { 1895 case XenbusStateConnected: 1896 /* spurious state change */ 1897 if (xnbp->xnb_connected) 1898 return; 1899 1900 if (xnb_connect_rings(dip)) { 1901 xnbp->xnb_flavour->xf_peer_connected(xnbp); 1902 } else { 1903 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1904 xnb_disconnect_rings(dip); 1905 (void) xvdi_switch_state(dip, XBT_NULL, 1906 XenbusStateClosed); 1907 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1908 } 1909 1910 /* 1911 * Now that we've attempted to connect it's reasonable 1912 * to allow an attempt to detach. 1913 */ 1914 xnbp->xnb_detachable = B_TRUE; 1915 1916 break; 1917 1918 case XenbusStateClosing: 1919 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); 1920 1921 break; 1922 1923 case XenbusStateClosed: 1924 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1925 1926 mutex_enter(&xnbp->xnb_tx_lock); 1927 mutex_enter(&xnbp->xnb_rx_lock); 1928 1929 xnb_disconnect_rings(dip); 1930 xnbp->xnb_connected = B_FALSE; 1931 1932 mutex_exit(&xnbp->xnb_rx_lock); 1933 mutex_exit(&xnbp->xnb_tx_lock); 1934 1935 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1936 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1937 /* 1938 * In all likelyhood this is already set (in the above 1939 * case), but if the peer never attempted to connect 1940 * and the domain is destroyed we get here without 1941 * having been through the case above, so we set it to 1942 * be sure. 1943 */ 1944 xnbp->xnb_detachable = B_TRUE; 1945 1946 break; 1947 1948 default: 1949 break; 1950 } 1951 } 1952 1953 /*ARGSUSED*/ 1954 static void 1955 xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1956 void *arg, void *impl_data) 1957 { 1958 xnb_t *xnbp = ddi_get_driver_private(dip); 1959 xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data; 1960 boolean_t success; 1961 1962 ASSERT(xnbp != NULL); 1963 1964 switch (state) { 1965 case Connected: 1966 1967 /* spurious hotplug event */ 1968 if (xnbp->xnb_hotplugged) 1969 return; 1970 1971 success = xnbp->xnb_flavour->xf_hotplug_connected(xnbp); 1972 1973 mutex_enter(&xnbp->xnb_tx_lock); 1974 mutex_enter(&xnbp->xnb_rx_lock); 1975 1976 xnbp->xnb_hotplugged = success; 1977 1978 mutex_exit(&xnbp->xnb_rx_lock); 1979 mutex_exit(&xnbp->xnb_tx_lock); 1980 break; 1981 1982 default: 1983 break; 1984 } 1985 } 1986 1987 static struct modldrv modldrv = { 1988 &mod_miscops, "xnb", 1989 }; 1990 1991 static struct modlinkage modlinkage = { 1992 MODREV_1, &modldrv, NULL 1993 }; 1994 1995 int 1996 _init(void) 1997 { 1998 int i; 1999 2000 mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL); 2001 2002 xnb_rxbuf_cachep = kmem_cache_create("xnb_rxbuf_cachep", 2003 sizeof (xnb_rxbuf_t), 0, xnb_rxbuf_constructor, 2004 xnb_rxbuf_destructor, NULL, NULL, NULL, 0); 2005 ASSERT(xnb_rxbuf_cachep != NULL); 2006 2007 i = mod_install(&modlinkage); 2008 if (i != DDI_SUCCESS) { 2009 kmem_cache_destroy(xnb_rxbuf_cachep); 2010 mutex_destroy(&xnb_alloc_page_lock); 2011 } 2012 return (i); 2013 } 2014 2015 int 2016 _info(struct modinfo *modinfop) 2017 { 2018 return (mod_info(&modlinkage, modinfop)); 2019 } 2020 2021 int 2022 _fini(void) 2023 { 2024 int i; 2025 2026 i = mod_remove(&modlinkage); 2027 if (i == DDI_SUCCESS) { 2028 kmem_cache_destroy(xnb_rxbuf_cachep); 2029 mutex_destroy(&xnb_alloc_page_lock); 2030 } 2031 return (i); 2032 } 2033