1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifdef DEBUG 28 #define XNB_DEBUG 1 29 #endif /* DEBUG */ 30 31 #include "xnb.h" 32 33 #include <sys/sunddi.h> 34 #include <sys/sunndi.h> 35 #include <sys/modctl.h> 36 #include <sys/conf.h> 37 #include <sys/mac.h> 38 #include <sys/dlpi.h> 39 #include <sys/strsubr.h> 40 #include <sys/strsun.h> 41 #include <sys/types.h> 42 #include <sys/pattr.h> 43 #include <vm/seg_kmem.h> 44 #include <vm/hat_i86.h> 45 #include <xen/sys/xenbus_impl.h> 46 #include <xen/sys/xendev.h> 47 #include <sys/balloon_impl.h> 48 #include <sys/evtchn_impl.h> 49 #include <sys/gnttab.h> 50 #include <vm/vm_dep.h> 51 52 #include <sys/gld.h> 53 #include <inet/ip.h> 54 #include <inet/ip_impl.h> 55 #include <sys/vnic_impl.h> /* blech. */ 56 57 /* 58 * The terms "transmit" and "receive" are used in alignment with domU, 59 * which means that packets originating from the peer domU are "transmitted" 60 * to other parts of the system and packets are "received" from them. 61 */ 62 63 /* 64 * XXPV dme: things to do, as well as various things indicated 65 * throughout the source: 66 * - copy avoidance outbound. 67 * - copy avoidance inbound. 68 * - transfer credit limiting. 69 * - MAC address based filtering. 70 */ 71 72 /* 73 * Linux expects to have some headroom in received buffers. The Linux 74 * frontend driver (netfront) checks to see if the headroom is 75 * available and will re-allocate the buffer to make room if 76 * necessary. To avoid this we add RX_BUFFER_HEADROOM bytes of 77 * headroom to each packet we pass to the peer. 78 */ 79 #define RX_BUFFER_HEADROOM 16 80 81 /* 82 * Should we attempt to defer checksum calculation? 83 */ 84 static boolean_t xnb_cksum_offload = B_TRUE; 85 /* 86 * When receiving packets from a guest, should they be copied 87 * or used as-is (esballoc)? 88 */ 89 static boolean_t xnb_tx_always_copy = B_TRUE; 90 91 static boolean_t xnb_connect_rings(dev_info_t *); 92 static void xnb_disconnect_rings(dev_info_t *); 93 static void xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t, 94 void *, void *); 95 static void xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t, 96 void *, void *); 97 98 static int xnb_txbuf_constructor(void *, void *, int); 99 static void xnb_txbuf_destructor(void *, void *); 100 static xnb_txbuf_t *xnb_txbuf_get(xnb_t *, int); 101 static void xnb_txbuf_put(xnb_t *, xnb_txbuf_t *); 102 static void xnb_tx_notify_peer(xnb_t *); 103 static void xnb_tx_complete(xnb_txbuf_t *); 104 static void xnb_tx_mark_complete(xnb_t *, RING_IDX, int16_t); 105 static void xnb_tx_schedule_unmop(xnb_t *, gnttab_map_grant_ref_t *, 106 xnb_txbuf_t *); 107 static void xnb_tx_perform_pending_unmop(xnb_t *); 108 mblk_t *xnb_copy_to_peer(xnb_t *, mblk_t *); 109 110 int xnb_unmop_lowwat = NET_TX_RING_SIZE >> 2; 111 int xnb_unmop_hiwat = NET_TX_RING_SIZE - (NET_TX_RING_SIZE >> 2); 112 113 114 boolean_t xnb_hv_copy = B_TRUE; 115 boolean_t xnb_explicit_pageflip_set = B_FALSE; 116 117 /* XXPV dme: are these really invalid? */ 118 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) 119 #define INVALID_GRANT_REF ((grant_ref_t)-1) 120 121 static kmem_cache_t *xnb_txbuf_cachep; 122 static kmutex_t xnb_alloc_page_lock; 123 124 /* 125 * Statistics. 126 */ 127 static char *aux_statistics[] = { 128 "rx_cksum_deferred", 129 "tx_cksum_no_need", 130 "rx_rsp_notok", 131 "tx_notify_deferred", 132 "tx_notify_sent", 133 "rx_notify_deferred", 134 "rx_notify_sent", 135 "tx_too_early", 136 "rx_too_early", 137 "rx_allocb_failed", 138 "tx_allocb_failed", 139 "rx_foreign_page", 140 "mac_full", 141 "spurious_intr", 142 "allocation_success", 143 "allocation_failure", 144 "small_allocation_success", 145 "small_allocation_failure", 146 "other_allocation_failure", 147 "rx_pageboundary_crossed", 148 "rx_cpoparea_grown", 149 "csum_hardware", 150 "csum_software", 151 }; 152 153 static int 154 xnb_ks_aux_update(kstat_t *ksp, int flag) 155 { 156 xnb_t *xnbp; 157 kstat_named_t *knp; 158 159 if (flag != KSTAT_READ) 160 return (EACCES); 161 162 xnbp = ksp->ks_private; 163 knp = ksp->ks_data; 164 165 /* 166 * Assignment order should match that of the names in 167 * aux_statistics. 168 */ 169 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_deferred; 170 (knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_no_need; 171 (knp++)->value.ui64 = xnbp->xnb_stat_rx_rsp_notok; 172 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred; 173 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent; 174 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred; 175 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent; 176 (knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early; 177 (knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early; 178 (knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed; 179 (knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed; 180 (knp++)->value.ui64 = xnbp->xnb_stat_rx_foreign_page; 181 (knp++)->value.ui64 = xnbp->xnb_stat_mac_full; 182 (knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr; 183 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_success; 184 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure; 185 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success; 186 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure; 187 (knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure; 188 (knp++)->value.ui64 = xnbp->xnb_stat_rx_pagebndry_crossed; 189 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cpoparea_grown; 190 (knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware; 191 (knp++)->value.ui64 = xnbp->xnb_stat_csum_software; 192 193 return (0); 194 } 195 196 static boolean_t 197 xnb_ks_init(xnb_t *xnbp) 198 { 199 int nstat = sizeof (aux_statistics) / 200 sizeof (aux_statistics[0]); 201 char **cp = aux_statistics; 202 kstat_named_t *knp; 203 204 /* 205 * Create and initialise kstats. 206 */ 207 xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo), 208 ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net", 209 KSTAT_TYPE_NAMED, nstat, 0); 210 if (xnbp->xnb_kstat_aux == NULL) 211 return (B_FALSE); 212 213 xnbp->xnb_kstat_aux->ks_private = xnbp; 214 xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update; 215 216 knp = xnbp->xnb_kstat_aux->ks_data; 217 while (nstat > 0) { 218 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 219 220 knp++; 221 cp++; 222 nstat--; 223 } 224 225 kstat_install(xnbp->xnb_kstat_aux); 226 227 return (B_TRUE); 228 } 229 230 static void 231 xnb_ks_free(xnb_t *xnbp) 232 { 233 kstat_delete(xnbp->xnb_kstat_aux); 234 } 235 236 /* 237 * Software checksum calculation and insertion for an arbitrary packet. 238 */ 239 /*ARGSUSED*/ 240 static mblk_t * 241 xnb_software_csum(xnb_t *xnbp, mblk_t *mp) 242 { 243 /* 244 * XXPV dme: shouldn't rely on vnic_fix_cksum(), not least 245 * because it doesn't cover all of the interesting cases :-( 246 */ 247 (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, 248 HCK_FULLCKSUM, KM_NOSLEEP); 249 250 return (vnic_fix_cksum(mp)); 251 } 252 253 mblk_t * 254 xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) 255 { 256 struct ether_header *ehp; 257 uint16_t sap; 258 uint32_t offset; 259 ipha_t *ipha; 260 261 ASSERT(mp->b_next == NULL); 262 263 /* 264 * Check that the packet is contained in a single mblk. In 265 * the "from peer" path this is true today, but will change 266 * when scatter gather support is added. In the "to peer" 267 * path we cannot be sure, but in most cases it will be true 268 * (in the xnbo case the packet has come from a MAC device 269 * which is unlikely to split packets). 270 */ 271 if (mp->b_cont != NULL) 272 goto software; 273 274 /* 275 * If the MAC has no hardware capability don't do any further 276 * checking. 277 */ 278 if (capab == 0) 279 goto software; 280 281 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 282 ehp = (struct ether_header *)mp->b_rptr; 283 284 if (ntohs(ehp->ether_type) == VLAN_TPID) { 285 struct ether_vlan_header *evhp; 286 287 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 288 evhp = (struct ether_vlan_header *)mp->b_rptr; 289 sap = ntohs(evhp->ether_type); 290 offset = sizeof (struct ether_vlan_header); 291 } else { 292 sap = ntohs(ehp->ether_type); 293 offset = sizeof (struct ether_header); 294 } 295 296 /* 297 * We only attempt to do IPv4 packets in hardware. 298 */ 299 if (sap != ETHERTYPE_IP) 300 goto software; 301 302 /* 303 * We know that this is an IPv4 packet. 304 */ 305 ipha = (ipha_t *)(mp->b_rptr + offset); 306 307 switch (ipha->ipha_protocol) { 308 case IPPROTO_TCP: 309 case IPPROTO_UDP: { 310 uint32_t start, length, stuff, cksum; 311 uint16_t *stuffp; 312 313 /* 314 * This is a TCP/IPv4 or UDP/IPv4 packet, for which we 315 * can use full IPv4 and partial checksum offload. 316 */ 317 if ((capab & (HCKSUM_INET_FULL_V4|HCKSUM_INET_PARTIAL)) == 0) 318 break; 319 320 start = IP_SIMPLE_HDR_LENGTH; 321 length = ntohs(ipha->ipha_length); 322 if (ipha->ipha_protocol == IPPROTO_TCP) { 323 stuff = start + TCP_CHECKSUM_OFFSET; 324 cksum = IP_TCP_CSUM_COMP; 325 } else { 326 stuff = start + UDP_CHECKSUM_OFFSET; 327 cksum = IP_UDP_CSUM_COMP; 328 } 329 stuffp = (uint16_t *)(mp->b_rptr + offset + stuff); 330 331 if (capab & HCKSUM_INET_FULL_V4) { 332 /* 333 * Some devices require that the checksum 334 * field of the packet is zero for full 335 * offload. 336 */ 337 *stuffp = 0; 338 339 (void) hcksum_assoc(mp, NULL, NULL, 340 0, 0, 0, 0, 341 HCK_FULLCKSUM, KM_NOSLEEP); 342 343 xnbp->xnb_stat_csum_hardware++; 344 345 return (mp); 346 } 347 348 if (capab & HCKSUM_INET_PARTIAL) { 349 if (*stuffp == 0) { 350 ipaddr_t src, dst; 351 352 /* 353 * Older Solaris guests don't insert 354 * the pseudo-header checksum, so we 355 * calculate it here. 356 */ 357 src = ipha->ipha_src; 358 dst = ipha->ipha_dst; 359 360 cksum += (dst >> 16) + (dst & 0xFFFF); 361 cksum += (src >> 16) + (src & 0xFFFF); 362 cksum += length - IP_SIMPLE_HDR_LENGTH; 363 364 cksum = (cksum >> 16) + (cksum & 0xFFFF); 365 cksum = (cksum >> 16) + (cksum & 0xFFFF); 366 367 ASSERT(cksum <= 0xFFFF); 368 369 *stuffp = (uint16_t)(cksum ? cksum : ~cksum); 370 } 371 372 (void) hcksum_assoc(mp, NULL, NULL, 373 start, stuff, length, 0, 374 HCK_PARTIALCKSUM, KM_NOSLEEP); 375 376 xnbp->xnb_stat_csum_hardware++; 377 378 return (mp); 379 } 380 381 /* NOTREACHED */ 382 break; 383 } 384 385 default: 386 /* Use software. */ 387 break; 388 } 389 390 software: 391 /* 392 * We are not able to use any offload so do the whole thing in 393 * software. 394 */ 395 xnbp->xnb_stat_csum_software++; 396 397 return (xnb_software_csum(xnbp, mp)); 398 } 399 400 int 401 xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data) 402 { 403 xnb_t *xnbp; 404 char *xsname, mac[ETHERADDRL * 3]; 405 406 xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP); 407 408 xnbp->xnb_flavour = flavour; 409 xnbp->xnb_flavour_data = flavour_data; 410 xnbp->xnb_devinfo = dip; 411 xnbp->xnb_evtchn = INVALID_EVTCHN; 412 xnbp->xnb_irq = B_FALSE; 413 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 414 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 415 xnbp->xnb_cksum_offload = xnb_cksum_offload; 416 xnbp->xnb_connected = B_FALSE; 417 xnbp->xnb_hotplugged = B_FALSE; 418 xnbp->xnb_detachable = B_FALSE; 419 xnbp->xnb_peer = xvdi_get_oeid(dip); 420 xnbp->xnb_tx_pages_writable = B_FALSE; 421 xnbp->xnb_tx_always_copy = xnb_tx_always_copy; 422 423 xnbp->xnb_tx_buf_count = 0; 424 xnbp->xnb_tx_unmop_count = 0; 425 426 xnbp->xnb_hv_copy = B_FALSE; 427 428 xnbp->xnb_rx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 429 ASSERT(xnbp->xnb_rx_va != NULL); 430 431 if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie) 432 != DDI_SUCCESS) 433 goto failure; 434 435 /* allocated on demand, when/if we enter xnb_copy_to_peer() */ 436 xnbp->xnb_rx_cpop = NULL; 437 xnbp->xnb_cpop_sz = 0; 438 439 mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER, 440 xnbp->xnb_icookie); 441 mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER, 442 xnbp->xnb_icookie); 443 444 /* set driver private pointer now */ 445 ddi_set_driver_private(dip, xnbp); 446 447 if (!xnb_ks_init(xnbp)) 448 goto failure_1; 449 450 /* 451 * Receive notification of changes in the state of the 452 * driver in the guest domain. 453 */ 454 if (xvdi_add_event_handler(dip, XS_OE_STATE, xnb_oe_state_change, 455 NULL) != DDI_SUCCESS) 456 goto failure_2; 457 458 /* 459 * Receive notification of hotplug events. 460 */ 461 if (xvdi_add_event_handler(dip, XS_HP_STATE, xnb_hp_state_change, 462 NULL) != DDI_SUCCESS) 463 goto failure_2; 464 465 xsname = xvdi_get_xsname(dip); 466 467 if (xenbus_printf(XBT_NULL, xsname, 468 "feature-no-csum-offload", "%d", 469 xnbp->xnb_cksum_offload ? 0 : 1) != 0) 470 goto failure_3; 471 472 /* 473 * Use global xnb_hv_copy to export this feature. This means that 474 * we have to decide what to do before starting up a guest domain 475 */ 476 if (xenbus_printf(XBT_NULL, xsname, 477 "feature-rx-copy", "%d", xnb_hv_copy ? 1 : 0) != 0) 478 goto failure_3; 479 /* 480 * Linux domUs seem to depend on "feature-rx-flip" being 0 481 * in addition to "feature-rx-copy" being 1. It seems strange 482 * to use four possible states to describe a binary decision, 483 * but we might as well play nice. 484 */ 485 if (xenbus_printf(XBT_NULL, xsname, 486 "feature-rx-flip", "%d", xnb_explicit_pageflip_set ? 1 : 0) != 0) 487 goto failure_3; 488 489 if (xenbus_scanf(XBT_NULL, xsname, 490 "mac", "%s", mac) != 0) { 491 cmn_err(CE_WARN, "xnb_attach: " 492 "cannot read mac address from %s", 493 xsname); 494 goto failure_3; 495 } 496 497 if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) { 498 cmn_err(CE_WARN, 499 "xnb_attach: cannot parse mac address %s", 500 mac); 501 goto failure_3; 502 } 503 504 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait); 505 (void) xvdi_post_event(dip, XEN_HP_ADD); 506 507 return (DDI_SUCCESS); 508 509 failure_3: 510 xvdi_remove_event_handler(dip, NULL); 511 512 failure_2: 513 xnb_ks_free(xnbp); 514 515 failure_1: 516 mutex_destroy(&xnbp->xnb_rx_lock); 517 mutex_destroy(&xnbp->xnb_tx_lock); 518 519 failure: 520 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE); 521 kmem_free(xnbp, sizeof (*xnbp)); 522 return (DDI_FAILURE); 523 } 524 525 /*ARGSUSED*/ 526 void 527 xnb_detach(dev_info_t *dip) 528 { 529 xnb_t *xnbp = ddi_get_driver_private(dip); 530 531 ASSERT(xnbp != NULL); 532 ASSERT(!xnbp->xnb_connected); 533 ASSERT(xnbp->xnb_tx_buf_count == 0); 534 535 xnb_disconnect_rings(dip); 536 537 xvdi_remove_event_handler(dip, NULL); 538 539 xnb_ks_free(xnbp); 540 541 ddi_set_driver_private(dip, NULL); 542 543 mutex_destroy(&xnbp->xnb_tx_lock); 544 mutex_destroy(&xnbp->xnb_rx_lock); 545 546 if (xnbp->xnb_cpop_sz > 0) 547 kmem_free(xnbp->xnb_rx_cpop, sizeof (*xnbp->xnb_rx_cpop) 548 * xnbp->xnb_cpop_sz); 549 550 ASSERT(xnbp->xnb_rx_va != NULL); 551 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE); 552 553 kmem_free(xnbp, sizeof (*xnbp)); 554 } 555 556 557 static mfn_t 558 xnb_alloc_page(xnb_t *xnbp) 559 { 560 #define WARNING_RATE_LIMIT 100 561 #define BATCH_SIZE 256 562 static mfn_t mfns[BATCH_SIZE]; /* common across all instances */ 563 static int nth = BATCH_SIZE; 564 mfn_t mfn; 565 566 mutex_enter(&xnb_alloc_page_lock); 567 if (nth == BATCH_SIZE) { 568 if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) { 569 xnbp->xnb_stat_allocation_failure++; 570 mutex_exit(&xnb_alloc_page_lock); 571 572 /* 573 * Try for a single page in low memory situations. 574 */ 575 if (balloon_alloc_pages(1, &mfn) != 1) { 576 if ((xnbp->xnb_stat_small_allocation_failure++ 577 % WARNING_RATE_LIMIT) == 0) 578 cmn_err(CE_WARN, "xnb_alloc_page: " 579 "Cannot allocate memory to " 580 "transfer packets to peer."); 581 return (0); 582 } else { 583 xnbp->xnb_stat_small_allocation_success++; 584 return (mfn); 585 } 586 } 587 588 nth = 0; 589 xnbp->xnb_stat_allocation_success++; 590 } 591 592 mfn = mfns[nth++]; 593 mutex_exit(&xnb_alloc_page_lock); 594 595 ASSERT(mfn != 0); 596 597 return (mfn); 598 #undef BATCH_SIZE 599 #undef WARNING_RATE_LIMIT 600 } 601 602 /*ARGSUSED*/ 603 static void 604 xnb_free_page(xnb_t *xnbp, mfn_t mfn) 605 { 606 int r; 607 pfn_t pfn; 608 609 pfn = xen_assign_pfn(mfn); 610 pfnzero(pfn, 0, PAGESIZE); 611 xen_release_pfn(pfn); 612 613 /* 614 * This happens only in the error path, so batching is 615 * not worth the complication. 616 */ 617 if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) { 618 cmn_err(CE_WARN, "free_page: cannot decrease memory " 619 "reservation (%d): page kept but unusable (mfn = 0x%lx).", 620 r, mfn); 621 } 622 } 623 624 /* 625 * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but 626 * using local variables. 627 */ 628 #define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \ 629 ((((_r)->sring->req_prod - loop) < \ 630 (RING_SIZE(_r) - (loop - prod))) ? \ 631 ((_r)->sring->req_prod - loop) : \ 632 (RING_SIZE(_r) - (loop - prod))) 633 634 mblk_t * 635 xnb_to_peer(xnb_t *xnbp, mblk_t *mp) 636 { 637 mblk_t *free = mp, *prev = NULL; 638 size_t len; 639 gnttab_transfer_t *gop; 640 boolean_t notify; 641 RING_IDX loop, prod, end; 642 643 /* 644 * For each packet the sequence of operations is: 645 * 646 * 1. get a new page from the hypervisor. 647 * 2. get a request slot from the ring. 648 * 3. copy the data into the new page. 649 * 4. transfer the page to the peer. 650 * 5. update the request slot. 651 * 6. kick the peer. 652 * 7. free mp. 653 * 654 * In order to reduce the number of hypercalls, we prepare 655 * several packets for the peer and perform a single hypercall 656 * to transfer them. 657 */ 658 659 mutex_enter(&xnbp->xnb_rx_lock); 660 661 /* 662 * If we are not connected to the peer or have not yet 663 * finished hotplug it is too early to pass packets to the 664 * peer. 665 */ 666 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 667 mutex_exit(&xnbp->xnb_rx_lock); 668 DTRACE_PROBE(flip_rx_too_early); 669 xnbp->xnb_stat_rx_too_early++; 670 return (mp); 671 } 672 673 loop = xnbp->xnb_rx_ring.req_cons; 674 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 675 gop = xnbp->xnb_rx_top; 676 677 while ((mp != NULL) && 678 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 679 680 mfn_t mfn; 681 pfn_t pfn; 682 netif_rx_request_t *rxreq; 683 netif_rx_response_t *rxresp; 684 char *valoop; 685 size_t offset; 686 mblk_t *ml; 687 uint16_t cksum_flags; 688 689 /* 1 */ 690 if ((mfn = xnb_alloc_page(xnbp)) == 0) { 691 xnbp->xnb_stat_rx_defer++; 692 break; 693 } 694 695 /* 2 */ 696 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 697 698 #ifdef XNB_DEBUG 699 if (!(rxreq->id < NET_RX_RING_SIZE)) 700 cmn_err(CE_PANIC, "xnb_to_peer: " 701 "id %d out of range in request 0x%p", 702 rxreq->id, (void *)rxreq); 703 #endif /* XNB_DEBUG */ 704 705 /* Assign a pfn and map the new page at the allocated va. */ 706 pfn = xen_assign_pfn(mfn); 707 hat_devload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE, 708 pfn, PROT_READ | PROT_WRITE, HAT_LOAD); 709 710 offset = RX_BUFFER_HEADROOM; 711 712 /* 3 */ 713 len = 0; 714 valoop = xnbp->xnb_rx_va + offset; 715 for (ml = mp; ml != NULL; ml = ml->b_cont) { 716 size_t chunk = ml->b_wptr - ml->b_rptr; 717 718 bcopy(ml->b_rptr, valoop, chunk); 719 valoop += chunk; 720 len += chunk; 721 } 722 723 ASSERT(len + offset < PAGESIZE); 724 725 /* Release the pfn. */ 726 hat_unload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE, 727 HAT_UNLOAD_UNMAP); 728 xen_release_pfn(pfn); 729 730 /* 4 */ 731 gop->mfn = mfn; 732 gop->domid = xnbp->xnb_peer; 733 gop->ref = rxreq->gref; 734 735 /* 5.1 */ 736 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 737 rxresp->offset = offset; 738 rxresp->flags = 0; 739 740 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 741 if (cksum_flags != 0) 742 xnbp->xnb_stat_rx_cksum_deferred++; 743 rxresp->flags |= cksum_flags; 744 745 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 746 rxresp->status = len; 747 748 loop++; 749 prod++; 750 gop++; 751 prev = mp; 752 mp = mp->b_next; 753 } 754 755 /* 756 * Did we actually do anything? 757 */ 758 if (loop == xnbp->xnb_rx_ring.req_cons) { 759 mutex_exit(&xnbp->xnb_rx_lock); 760 return (mp); 761 } 762 763 end = loop; 764 765 /* 766 * Unlink the end of the 'done' list from the remainder. 767 */ 768 ASSERT(prev != NULL); 769 prev->b_next = NULL; 770 771 if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_rx_top, 772 loop - xnbp->xnb_rx_ring.req_cons) != 0) { 773 cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed"); 774 } 775 776 loop = xnbp->xnb_rx_ring.req_cons; 777 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 778 gop = xnbp->xnb_rx_top; 779 780 while (loop < end) { 781 int16_t status = NETIF_RSP_OKAY; 782 783 if (gop->status != 0) { 784 status = NETIF_RSP_ERROR; 785 786 /* 787 * If the status is anything other than 788 * GNTST_bad_page then we don't own the page 789 * any more, so don't try to give it back. 790 */ 791 if (gop->status != GNTST_bad_page) 792 gop->mfn = 0; 793 } else { 794 /* The page is no longer ours. */ 795 gop->mfn = 0; 796 } 797 798 if (gop->mfn != 0) 799 /* 800 * Give back the page, as we won't be using 801 * it. 802 */ 803 xnb_free_page(xnbp, gop->mfn); 804 else 805 /* 806 * We gave away a page, update our accounting 807 * now. 808 */ 809 balloon_drv_subtracted(1); 810 811 /* 5.2 */ 812 if (status != NETIF_RSP_OKAY) { 813 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 814 status; 815 } else { 816 xnbp->xnb_stat_ipackets++; 817 xnbp->xnb_stat_rbytes += len; 818 } 819 820 loop++; 821 prod++; 822 gop++; 823 } 824 825 xnbp->xnb_rx_ring.req_cons = loop; 826 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 827 828 /* 6 */ 829 /* LINTED: constant in conditional context */ 830 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 831 if (notify) { 832 ec_notify_via_evtchn(xnbp->xnb_evtchn); 833 xnbp->xnb_stat_rx_notify_sent++; 834 } else { 835 xnbp->xnb_stat_rx_notify_deferred++; 836 } 837 838 if (mp != NULL) 839 xnbp->xnb_stat_rx_defer++; 840 841 mutex_exit(&xnbp->xnb_rx_lock); 842 843 /* Free mblk_t's that we consumed. */ 844 freemsgchain(free); 845 846 return (mp); 847 } 848 849 /* helper functions for xnb_copy_to_peer */ 850 851 /* 852 * Grow the array of copy operation descriptors. 853 * Returns a pointer to the next available entry. 854 */ 855 gnttab_copy_t * 856 grow_cpop_area(xnb_t *xnbp, gnttab_copy_t *o_cpop) 857 { 858 /* 859 * o_cpop (arg.1) is a ptr to the area we would like to copy 860 * something into but cannot, because we haven't alloc'ed it 861 * yet, or NULL. 862 * old_cpop and new_cpop (local) are pointers to old/new 863 * versions of xnbp->xnb_rx_cpop. 864 */ 865 gnttab_copy_t *new_cpop, *old_cpop, *ret_cpop; 866 size_t newcount; 867 868 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 869 870 old_cpop = xnbp->xnb_rx_cpop; 871 /* 872 * o_cpop is a pointer into the array pointed to by old_cpop; 873 * it would be an error for exactly one of these pointers to be NULL. 874 * We shouldn't call this function if xnb_rx_cpop has already 875 * been allocated, but we're starting to fill it from the beginning 876 * again. 877 */ 878 ASSERT((o_cpop == NULL && old_cpop == NULL) || 879 (o_cpop != NULL && old_cpop != NULL && o_cpop != old_cpop)); 880 881 newcount = xnbp->xnb_cpop_sz + CPOP_DEFCNT; 882 883 new_cpop = kmem_alloc(sizeof (*new_cpop) * newcount, KM_NOSLEEP); 884 if (new_cpop == NULL) { 885 xnbp->xnb_stat_other_allocation_failure++; 886 return (NULL); 887 } 888 889 if (o_cpop != NULL) { 890 size_t offset = (o_cpop - old_cpop); 891 892 /* we only need to move the parts in use ... */ 893 (void) memmove(new_cpop, old_cpop, xnbp->xnb_cpop_sz * 894 (sizeof (*old_cpop))); 895 896 kmem_free(old_cpop, xnbp->xnb_cpop_sz * sizeof (*old_cpop)); 897 898 ret_cpop = new_cpop + offset; 899 } else { 900 ret_cpop = new_cpop; 901 } 902 903 xnbp->xnb_rx_cpop = new_cpop; 904 xnbp->xnb_cpop_sz = newcount; 905 906 xnbp->xnb_stat_rx_cpoparea_grown++; 907 908 return (ret_cpop); 909 } 910 911 /* 912 * Check whether an address is on a page that's foreign to this domain. 913 */ 914 static boolean_t 915 is_foreign(void *addr) 916 { 917 pfn_t pfn = hat_getpfnum(kas.a_hat, addr); 918 919 return (pfn & PFN_IS_FOREIGN_MFN ? B_TRUE : B_FALSE); 920 } 921 922 /* 923 * Insert a newly allocated mblk into a chain, replacing the old one. 924 */ 925 static mblk_t * 926 replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev) 927 { 928 uint32_t start, stuff, end, value, flags; 929 mblk_t *new_mp; 930 931 new_mp = copyb(mp); 932 if (new_mp == NULL) 933 cmn_err(CE_PANIC, "replace_msg: cannot alloc new message" 934 "for %p, len %lu", (void *) mp, len); 935 936 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 937 (void) hcksum_assoc(new_mp, NULL, NULL, start, stuff, end, value, 938 flags, KM_NOSLEEP); 939 940 new_mp->b_next = mp->b_next; 941 new_mp->b_prev = mp->b_prev; 942 new_mp->b_cont = mp->b_cont; 943 944 /* Make sure we only overwrite pointers to the mblk being replaced. */ 945 if (mp_prev != NULL && mp_prev->b_next == mp) 946 mp_prev->b_next = new_mp; 947 948 if (ml_prev != NULL && ml_prev->b_cont == mp) 949 ml_prev->b_cont = new_mp; 950 951 mp->b_next = mp->b_prev = mp->b_cont = NULL; 952 freemsg(mp); 953 954 return (new_mp); 955 } 956 957 /* 958 * Set all the fields in a gnttab_copy_t. 959 */ 960 static void 961 setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr, 962 size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref) 963 { 964 ASSERT(xnbp != NULL && gp != NULL); 965 966 gp->source.offset = s_off; 967 gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr)); 968 gp->source.domid = DOMID_SELF; 969 970 gp->len = (uint16_t)len; 971 gp->flags = GNTCOPY_dest_gref; 972 gp->status = 0; 973 974 gp->dest.u.ref = d_ref; 975 gp->dest.offset = d_off; 976 gp->dest.domid = xnbp->xnb_peer; 977 } 978 979 mblk_t * 980 xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp) 981 { 982 mblk_t *free = mp, *mp_prev = NULL, *saved_mp = mp; 983 mblk_t *ml, *ml_prev; 984 gnttab_copy_t *gop_cp; 985 boolean_t notify; 986 RING_IDX loop, prod; 987 int i; 988 989 if (!xnbp->xnb_hv_copy) 990 return (xnb_to_peer(xnbp, mp)); 991 992 /* 993 * For each packet the sequence of operations is: 994 * 995 * 1. get a request slot from the ring. 996 * 2. set up data for hypercall (see NOTE below) 997 * 3. have the hypervisore copy the data 998 * 4. update the request slot. 999 * 5. kick the peer. 1000 * 1001 * NOTE ad 2. 1002 * In order to reduce the number of hypercalls, we prepare 1003 * several packets (mp->b_cont != NULL) for the peer and 1004 * perform a single hypercall to transfer them. 1005 * We also have to set up a seperate copy operation for 1006 * every page. 1007 * 1008 * If we have more than one message (mp->b_next != NULL), 1009 * we do this whole dance repeatedly. 1010 */ 1011 1012 mutex_enter(&xnbp->xnb_rx_lock); 1013 1014 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 1015 mutex_exit(&xnbp->xnb_rx_lock); 1016 DTRACE_PROBE(copy_rx_too_early); 1017 xnbp->xnb_stat_rx_too_early++; 1018 return (mp); 1019 } 1020 1021 loop = xnbp->xnb_rx_ring.req_cons; 1022 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 1023 1024 while ((mp != NULL) && 1025 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 1026 netif_rx_request_t *rxreq; 1027 netif_rx_response_t *rxresp; 1028 size_t offset, d_offset; 1029 size_t len; 1030 uint16_t cksum_flags; 1031 int16_t status = NETIF_RSP_OKAY; 1032 int item_count; 1033 1034 /* 1 */ 1035 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 1036 1037 #ifdef XNB_DEBUG 1038 if (!(rxreq->id < NET_RX_RING_SIZE)) 1039 cmn_err(CE_PANIC, "xnb_copy_to_peer: " 1040 "id %d out of range in request 0x%p", 1041 rxreq->id, (void *)rxreq); 1042 #endif /* XNB_DEBUG */ 1043 1044 /* 2 */ 1045 d_offset = offset = RX_BUFFER_HEADROOM; 1046 len = 0; 1047 item_count = 0; 1048 1049 gop_cp = xnbp->xnb_rx_cpop; 1050 1051 /* 1052 * We walk the b_cont pointers and set up a gop_cp 1053 * structure for every page in every data block we have. 1054 */ 1055 /* 2a */ 1056 for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) { 1057 size_t chunk = ml->b_wptr - ml->b_rptr; 1058 uchar_t *r_tmp, *rpt_align; 1059 size_t r_offset; 1060 1061 /* 1062 * If we get an mblk on a page that doesn't belong to 1063 * this domain, get a new mblk to replace the old one. 1064 */ 1065 if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) { 1066 mblk_t *ml_new = replace_msg(ml, chunk, 1067 mp_prev, ml_prev); 1068 1069 /* We can still use old ml, but not *ml! */ 1070 if (free == ml) 1071 free = ml_new; 1072 if (mp == ml) 1073 mp = ml_new; 1074 ml = ml_new; 1075 1076 xnbp->xnb_stat_rx_foreign_page++; 1077 } 1078 1079 rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr); 1080 r_offset = (uint16_t)(ml->b_rptr - rpt_align); 1081 r_tmp = ml->b_rptr; 1082 1083 if (d_offset + chunk > PAGESIZE) 1084 cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p " 1085 "(svd: %p), ml %p,rpt_alg. %p, d_offset " 1086 "(%lu) + chunk (%lu) > PAGESIZE %d!", 1087 (void *)mp, (void *)saved_mp, (void *)ml, 1088 (void *)rpt_align, 1089 d_offset, chunk, (int)PAGESIZE); 1090 1091 while (chunk > 0) { 1092 size_t part_len; 1093 1094 item_count++; 1095 if (item_count > xnbp->xnb_cpop_sz) { 1096 gop_cp = grow_cpop_area(xnbp, gop_cp); 1097 if (gop_cp == NULL) 1098 goto failure; 1099 } 1100 /* 1101 * If our mblk crosses a page boundary, we need 1102 * to do a seperate copy for every page. 1103 */ 1104 if (r_offset + chunk > PAGESIZE) { 1105 part_len = PAGESIZE - r_offset; 1106 1107 DTRACE_PROBE3(mblk_page_crossed, 1108 (mblk_t *), ml, int, chunk, int, 1109 (int)r_offset); 1110 1111 xnbp->xnb_stat_rx_pagebndry_crossed++; 1112 } else { 1113 part_len = chunk; 1114 } 1115 1116 setup_gop(xnbp, gop_cp, r_tmp, r_offset, 1117 d_offset, part_len, rxreq->gref); 1118 1119 chunk -= part_len; 1120 1121 len += part_len; 1122 d_offset += part_len; 1123 r_tmp += part_len; 1124 /* 1125 * The 2nd, 3rd ... last copies will always 1126 * start at r_tmp, therefore r_offset is 0. 1127 */ 1128 r_offset = 0; 1129 gop_cp++; 1130 } 1131 ml_prev = ml; 1132 DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int, 1133 chunk, int, len, int, item_count); 1134 } 1135 /* 3 */ 1136 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_rx_cpop, 1137 item_count) != 0) { 1138 cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed"); 1139 DTRACE_PROBE(HV_granttableopfailed); 1140 } 1141 1142 /* 4 */ 1143 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 1144 rxresp->offset = offset; 1145 1146 rxresp->flags = 0; 1147 1148 DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int, 1149 (int)rxresp->offset, int, (int)rxresp->flags, int, 1150 (int)rxresp->status); 1151 1152 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 1153 if (cksum_flags != 0) 1154 xnbp->xnb_stat_rx_cksum_deferred++; 1155 rxresp->flags |= cksum_flags; 1156 1157 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 1158 rxresp->status = len; 1159 1160 DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int, 1161 (int)rxresp->offset, int, (int)rxresp->flags, int, 1162 (int)rxresp->status); 1163 1164 for (i = 0; i < item_count; i++) { 1165 if (xnbp->xnb_rx_cpop[i].status != 0) { 1166 DTRACE_PROBE2(cpop__status__nonnull, int, 1167 (int)xnbp->xnb_rx_cpop[i].status, 1168 int, i); 1169 status = NETIF_RSP_ERROR; 1170 } 1171 } 1172 1173 /* 5.2 */ 1174 if (status != NETIF_RSP_OKAY) { 1175 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 1176 status; 1177 xnbp->xnb_stat_rx_rsp_notok++; 1178 } else { 1179 xnbp->xnb_stat_ipackets++; 1180 xnbp->xnb_stat_rbytes += len; 1181 } 1182 1183 loop++; 1184 prod++; 1185 mp_prev = mp; 1186 mp = mp->b_next; 1187 } 1188 failure: 1189 /* 1190 * Did we actually do anything? 1191 */ 1192 if (loop == xnbp->xnb_rx_ring.req_cons) { 1193 mutex_exit(&xnbp->xnb_rx_lock); 1194 return (mp); 1195 } 1196 1197 /* 1198 * Unlink the end of the 'done' list from the remainder. 1199 */ 1200 ASSERT(mp_prev != NULL); 1201 mp_prev->b_next = NULL; 1202 1203 xnbp->xnb_rx_ring.req_cons = loop; 1204 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 1205 1206 /* 6 */ 1207 /* LINTED: constant in conditional context */ 1208 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 1209 if (notify) { 1210 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1211 xnbp->xnb_stat_rx_notify_sent++; 1212 } else { 1213 xnbp->xnb_stat_rx_notify_deferred++; 1214 } 1215 1216 if (mp != NULL) 1217 xnbp->xnb_stat_rx_defer++; 1218 1219 mutex_exit(&xnbp->xnb_rx_lock); 1220 1221 /* Free mblk_t structs we have consumed. */ 1222 freemsgchain(free); 1223 1224 return (mp); 1225 } 1226 1227 /*ARGSUSED*/ 1228 static int 1229 xnb_txbuf_constructor(void *buf, void *arg, int kmflag) 1230 { 1231 xnb_txbuf_t *txp = buf; 1232 1233 bzero(txp, sizeof (*txp)); 1234 1235 txp->xt_free_rtn.free_func = xnb_tx_complete; 1236 txp->xt_free_rtn.free_arg = (caddr_t)txp; 1237 1238 txp->xt_mop.host_addr = 1239 (uint64_t)(uintptr_t)vmem_alloc(heap_arena, PAGESIZE, 1240 ((kmflag & KM_NOSLEEP) == KM_NOSLEEP) ? 1241 VM_NOSLEEP : VM_SLEEP); 1242 1243 if (txp->xt_mop.host_addr == NULL) { 1244 cmn_err(CE_WARN, "xnb_txbuf_constructor: " 1245 "cannot get address space"); 1246 return (-1); 1247 } 1248 1249 /* 1250 * Have the hat ensure that page table exists for the VA. 1251 */ 1252 hat_prepare_mapping(kas.a_hat, 1253 (caddr_t)(uintptr_t)txp->xt_mop.host_addr, NULL); 1254 1255 return (0); 1256 } 1257 1258 /*ARGSUSED*/ 1259 static void 1260 xnb_txbuf_destructor(void *buf, void *arg) 1261 { 1262 xnb_txbuf_t *txp = buf; 1263 1264 ASSERT(txp->xt_mop.host_addr != NULL); 1265 ASSERT((txp->xt_flags & XNB_TXBUF_INUSE) == 0); 1266 1267 hat_release_mapping(kas.a_hat, 1268 (caddr_t)(uintptr_t)txp->xt_mop.host_addr); 1269 vmem_free(heap_arena, 1270 (caddr_t)(uintptr_t)txp->xt_mop.host_addr, PAGESIZE); 1271 } 1272 1273 static void 1274 xnb_tx_notify_peer(xnb_t *xnbp) 1275 { 1276 boolean_t notify; 1277 1278 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1279 1280 /* LINTED: constant in conditional context */ 1281 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify); 1282 if (notify) { 1283 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1284 xnbp->xnb_stat_tx_notify_sent++; 1285 } else { 1286 xnbp->xnb_stat_tx_notify_deferred++; 1287 } 1288 } 1289 1290 static void 1291 xnb_tx_complete(xnb_txbuf_t *txp) 1292 { 1293 xnb_t *xnbp = txp->xt_xnbp; 1294 1295 ASSERT((txp->xt_flags & XNB_TXBUF_INUSE) == XNB_TXBUF_INUSE); 1296 1297 mutex_enter(&xnbp->xnb_tx_lock); 1298 xnb_tx_schedule_unmop(xnbp, &txp->xt_mop, txp); 1299 mutex_exit(&xnbp->xnb_tx_lock); 1300 } 1301 1302 static void 1303 xnb_tx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status) 1304 { 1305 RING_IDX i; 1306 netif_tx_response_t *txresp; 1307 1308 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1309 1310 i = xnbp->xnb_tx_ring.rsp_prod_pvt; 1311 1312 txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i); 1313 txresp->id = id; 1314 txresp->status = status; 1315 1316 xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1; 1317 1318 /* 1319 * Note that we don't push the change to the peer here - that 1320 * is the callers responsibility. 1321 */ 1322 } 1323 1324 static void 1325 xnb_tx_schedule_unmop(xnb_t *xnbp, gnttab_map_grant_ref_t *mop, 1326 xnb_txbuf_t *txp) 1327 { 1328 gnttab_unmap_grant_ref_t *unmop; 1329 int u_count; 1330 int reqs_on_ring; 1331 1332 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1333 ASSERT(xnbp->xnb_tx_unmop_count < NET_TX_RING_SIZE); 1334 1335 u_count = xnbp->xnb_tx_unmop_count++; 1336 1337 /* Cache data for the time when we actually unmap grant refs */ 1338 xnbp->xnb_tx_unmop_txp[u_count] = txp; 1339 1340 unmop = &xnbp->xnb_tx_unmop[u_count]; 1341 unmop->host_addr = mop->host_addr; 1342 unmop->dev_bus_addr = mop->dev_bus_addr; 1343 unmop->handle = mop->handle; 1344 1345 /* 1346 * We cannot check the ring once we're disconnected from it. Batching 1347 * doesn't seem to be a useful optimisation in this case either, 1348 * so we directly call into the actual unmap function. 1349 */ 1350 if (xnbp->xnb_connected) { 1351 reqs_on_ring = RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_tx_ring); 1352 1353 /* 1354 * By tuning xnb_unmop_hiwat to N, we can emulate "N per batch" 1355 * or (with N == 1) "immediate unmop" behaviour. 1356 * The "> xnb_unmop_lowwat" is a guard against ring exhaustion. 1357 */ 1358 if (xnbp->xnb_tx_unmop_count < xnb_unmop_hiwat && 1359 reqs_on_ring > xnb_unmop_lowwat) 1360 return; 1361 } 1362 1363 xnb_tx_perform_pending_unmop(xnbp); 1364 } 1365 1366 /* 1367 * Here we perform the actual unmapping of the data that was 1368 * accumulated in xnb_tx_schedule_unmop(). 1369 * Note that it is the caller's responsibility to make sure that 1370 * there's actually something there to unmop. 1371 */ 1372 static void 1373 xnb_tx_perform_pending_unmop(xnb_t *xnbp) 1374 { 1375 RING_IDX loop; 1376 #ifdef XNB_DEBUG 1377 gnttab_unmap_grant_ref_t *unmop; 1378 #endif /* XNB_DEBUG */ 1379 1380 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1381 ASSERT(xnbp->xnb_tx_unmop_count > 0); 1382 1383 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1384 xnbp->xnb_tx_unmop, xnbp->xnb_tx_unmop_count) < 0) { 1385 cmn_err(CE_WARN, "xnb_tx_perform_pending_unmop: " 1386 "unmap grant operation failed, " 1387 "%d pages lost", xnbp->xnb_tx_unmop_count); 1388 } 1389 1390 #ifdef XNB_DEBUG 1391 for (loop = 0, unmop = xnbp->xnb_tx_unmop; 1392 loop < xnbp->xnb_tx_unmop_count; 1393 loop++, unmop++) { 1394 if (unmop->status != 0) { 1395 cmn_err(CE_WARN, "xnb_tx_perform_pending_unmop: " 1396 "unmap grant reference failed (%d)", 1397 unmop->status); 1398 } 1399 } 1400 #endif /* XNB_DEBUG */ 1401 1402 for (loop = 0; loop < xnbp->xnb_tx_unmop_count; loop++) { 1403 xnb_txbuf_t *txp = xnbp->xnb_tx_unmop_txp[loop]; 1404 1405 if (txp == NULL) 1406 cmn_err(CE_PANIC, 1407 "xnb_tx_perform_pending_unmop: " 1408 "unexpected NULL txp (loop %d; count %d)!", 1409 loop, xnbp->xnb_tx_unmop_count); 1410 1411 if (xnbp->xnb_connected) 1412 xnb_tx_mark_complete(xnbp, txp->xt_id, txp->xt_status); 1413 xnb_txbuf_put(xnbp, txp); 1414 } 1415 if (xnbp->xnb_connected) 1416 xnb_tx_notify_peer(xnbp); 1417 1418 xnbp->xnb_tx_unmop_count = 0; 1419 1420 #ifdef XNB_DEBUG 1421 bzero(xnbp->xnb_tx_unmop, sizeof (xnbp->xnb_tx_unmop)); 1422 bzero(xnbp->xnb_tx_unmop_txp, sizeof (xnbp->xnb_tx_unmop_txp)); 1423 #endif /* XNB_DEBUG */ 1424 } 1425 1426 static xnb_txbuf_t * 1427 xnb_txbuf_get(xnb_t *xnbp, int flags) 1428 { 1429 xnb_txbuf_t *txp; 1430 1431 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1432 1433 txp = kmem_cache_alloc(xnb_txbuf_cachep, flags); 1434 if (txp != NULL) { 1435 ASSERT((txp->xt_flags & XNB_TXBUF_INUSE) == 0); 1436 txp->xt_flags |= XNB_TXBUF_INUSE; 1437 1438 txp->xt_xnbp = xnbp; 1439 txp->xt_mop.dom = xnbp->xnb_peer; 1440 1441 txp->xt_mop.flags = GNTMAP_host_map; 1442 if (!xnbp->xnb_tx_pages_writable) 1443 txp->xt_mop.flags |= GNTMAP_readonly; 1444 1445 xnbp->xnb_tx_buf_count++; 1446 } 1447 1448 return (txp); 1449 } 1450 1451 static void 1452 xnb_txbuf_put(xnb_t *xnbp, xnb_txbuf_t *txp) 1453 { 1454 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1455 ASSERT((txp->xt_flags & XNB_TXBUF_INUSE) == XNB_TXBUF_INUSE); 1456 1457 txp->xt_flags &= ~XNB_TXBUF_INUSE; 1458 xnbp->xnb_tx_buf_count--; 1459 1460 kmem_cache_free(xnb_txbuf_cachep, txp); 1461 } 1462 1463 static mblk_t * 1464 xnb_from_peer(xnb_t *xnbp) 1465 { 1466 RING_IDX start, end, loop; 1467 gnttab_map_grant_ref_t *mop; 1468 xnb_txbuf_t **txpp; 1469 netif_tx_request_t *txreq; 1470 boolean_t work_to_do; 1471 mblk_t *head, *tail; 1472 /* 1473 * If the peer granted a read-only mapping to the page then we 1474 * must copy the data, as the local protocol stack (should the 1475 * packet be destined for this host) will modify the packet 1476 * 'in place'. 1477 */ 1478 boolean_t copy = xnbp->xnb_tx_always_copy || 1479 !xnbp->xnb_tx_pages_writable; 1480 1481 /* 1482 * For each individual request, the sequence of actions is: 1483 * 1484 * 1. get the request. 1485 * 2. map the page based on the grant ref. 1486 * 3. allocate an mblk, copy the data to it. 1487 * 4. release the grant. 1488 * 5. update the ring. 1489 * 6. pass the packet upward. 1490 * 7. kick the peer. 1491 * 1492 * In fact, we try to perform the grant operations in batches, 1493 * so there are two loops. 1494 */ 1495 1496 head = tail = NULL; 1497 around: 1498 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1499 1500 /* LINTED: constant in conditional context */ 1501 RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do); 1502 if (!work_to_do) { 1503 finished: 1504 return (head); 1505 } 1506 1507 start = xnbp->xnb_tx_ring.req_cons; 1508 end = xnbp->xnb_tx_ring.sring->req_prod; 1509 1510 if ((end - start) > NET_TX_RING_SIZE) { 1511 /* 1512 * This usually indicates that the frontend driver is 1513 * misbehaving, as it's not possible to have more than 1514 * NET_TX_RING_SIZE ring elements in play at any one 1515 * time. 1516 * 1517 * We reset the ring pointers to the state declared by 1518 * the frontend and try to carry on. 1519 */ 1520 cmn_err(CE_WARN, "xnb_from_peer: domain %d tried to give us %u " 1521 "items in the ring, resetting and trying to recover.", 1522 xnbp->xnb_peer, (end - start)); 1523 1524 /* LINTED: constant in conditional context */ 1525 BACK_RING_ATTACH(&xnbp->xnb_tx_ring, 1526 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1527 1528 goto around; 1529 } 1530 1531 for (loop = start, mop = xnbp->xnb_tx_mop, txpp = xnbp->xnb_tx_bufp; 1532 loop != end; 1533 loop++, mop++, txpp++) { 1534 xnb_txbuf_t *txp; 1535 1536 txp = xnb_txbuf_get(xnbp, KM_NOSLEEP); 1537 if (txp == NULL) 1538 break; 1539 1540 ASSERT(xnbp->xnb_tx_pages_writable || 1541 ((txp->xt_mop.flags & GNTMAP_readonly) 1542 == GNTMAP_readonly)); 1543 1544 txp->xt_mop.ref = 1545 RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop)->gref; 1546 1547 *mop = txp->xt_mop; 1548 *txpp = txp; 1549 } 1550 1551 if ((loop - start) == 0) 1552 goto finished; 1553 1554 end = loop; 1555 1556 if (xen_map_gref(GNTTABOP_map_grant_ref, xnbp->xnb_tx_mop, 1557 end - start, B_FALSE) != 0) { 1558 1559 cmn_err(CE_WARN, "xnb_from_peer: map grant operation failed"); 1560 1561 loop = start; 1562 txpp = xnbp->xnb_tx_bufp; 1563 1564 while (loop != end) { 1565 xnb_txbuf_put(xnbp, *txpp); 1566 1567 loop++; 1568 txpp++; 1569 } 1570 1571 goto finished; 1572 } 1573 1574 for (loop = start, mop = xnbp->xnb_tx_mop, txpp = xnbp->xnb_tx_bufp; 1575 loop != end; 1576 loop++, mop++, txpp++) { 1577 mblk_t *mp = NULL; 1578 int16_t status = NETIF_RSP_OKAY; 1579 xnb_txbuf_t *txp = *txpp; 1580 1581 if (mop->status != 0) { 1582 cmn_err(CE_WARN, "xnb_from_peer: " 1583 "failed to map buffer: %d", 1584 mop->status); 1585 status = NETIF_RSP_ERROR; 1586 } 1587 1588 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop); 1589 1590 if (status == NETIF_RSP_OKAY) { 1591 if (copy) { 1592 mp = allocb(txreq->size, BPRI_MED); 1593 if (mp == NULL) { 1594 status = NETIF_RSP_ERROR; 1595 xnbp->xnb_stat_tx_allocb_failed++; 1596 } else { 1597 bcopy((caddr_t)(uintptr_t) 1598 mop->host_addr + txreq->offset, 1599 mp->b_wptr, txreq->size); 1600 mp->b_wptr += txreq->size; 1601 } 1602 } else { 1603 mp = desballoc((uchar_t *)(uintptr_t) 1604 mop->host_addr + txreq->offset, 1605 txreq->size, 0, &txp->xt_free_rtn); 1606 if (mp == NULL) { 1607 status = NETIF_RSP_ERROR; 1608 xnbp->xnb_stat_tx_allocb_failed++; 1609 } else { 1610 txp->xt_id = txreq->id; 1611 txp->xt_status = status; 1612 txp->xt_mop = *mop; 1613 1614 mp->b_wptr += txreq->size; 1615 } 1616 } 1617 1618 /* 1619 * If we have a buffer and there are checksum 1620 * flags, process them appropriately. 1621 */ 1622 if ((mp != NULL) && 1623 ((txreq->flags & 1624 (NETTXF_csum_blank | NETTXF_data_validated)) 1625 != 0)) { 1626 mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp, 1627 mp, txreq->flags); 1628 xnbp->xnb_stat_tx_cksum_no_need++; 1629 } 1630 } 1631 1632 if (copy || (mp == NULL)) { 1633 txp->xt_status = status; 1634 txp->xt_id = txreq->id; 1635 xnb_tx_schedule_unmop(xnbp, mop, txp); 1636 } 1637 1638 if (mp != NULL) { 1639 xnbp->xnb_stat_opackets++; 1640 xnbp->xnb_stat_obytes += txreq->size; 1641 1642 mp->b_next = NULL; 1643 if (head == NULL) { 1644 ASSERT(tail == NULL); 1645 head = mp; 1646 } else { 1647 ASSERT(tail != NULL); 1648 tail->b_next = mp; 1649 } 1650 tail = mp; 1651 } 1652 } 1653 1654 xnbp->xnb_tx_ring.req_cons = loop; 1655 1656 goto around; 1657 /* NOTREACHED */ 1658 } 1659 1660 /* 1661 * intr() -- ring interrupt service routine 1662 */ 1663 static uint_t 1664 xnb_intr(caddr_t arg) 1665 { 1666 xnb_t *xnbp = (xnb_t *)arg; 1667 mblk_t *mp; 1668 1669 xnbp->xnb_stat_intr++; 1670 1671 mutex_enter(&xnbp->xnb_tx_lock); 1672 1673 ASSERT(xnbp->xnb_connected); 1674 1675 mp = xnb_from_peer(xnbp); 1676 1677 mutex_exit(&xnbp->xnb_tx_lock); 1678 1679 if (!xnbp->xnb_hotplugged) { 1680 xnbp->xnb_stat_tx_too_early++; 1681 goto fail; 1682 } 1683 if (mp == NULL) { 1684 xnbp->xnb_stat_spurious_intr++; 1685 goto fail; 1686 } 1687 1688 xnbp->xnb_flavour->xf_from_peer(xnbp, mp); 1689 1690 return (DDI_INTR_CLAIMED); 1691 1692 fail: 1693 freemsgchain(mp); 1694 return (DDI_INTR_CLAIMED); 1695 } 1696 1697 static boolean_t 1698 xnb_connect_rings(dev_info_t *dip) 1699 { 1700 xnb_t *xnbp = ddi_get_driver_private(dip); 1701 char *oename; 1702 struct gnttab_map_grant_ref map_op; 1703 evtchn_port_t evtchn; 1704 int i; 1705 1706 /* 1707 * Cannot attempt to connect the rings if already connected. 1708 */ 1709 ASSERT(!xnbp->xnb_connected); 1710 1711 oename = xvdi_get_oename(dip); 1712 1713 if (xenbus_gather(XBT_NULL, oename, 1714 "event-channel", "%u", &evtchn, 1715 "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref, 1716 "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref, 1717 NULL) != 0) { 1718 cmn_err(CE_WARN, "xnb_connect_rings: " 1719 "cannot read other-end details from %s", 1720 oename); 1721 goto fail; 1722 } 1723 1724 if (xenbus_scanf(XBT_NULL, oename, 1725 "feature-tx-writable", "%d", &i) != 0) 1726 i = 0; 1727 if (i != 0) 1728 xnbp->xnb_tx_pages_writable = B_TRUE; 1729 1730 if (xenbus_scanf(XBT_NULL, oename, 1731 "feature-no-csum-offload", "%d", &i) != 0) 1732 i = 0; 1733 if ((i == 1) || !xnbp->xnb_cksum_offload) 1734 xnbp->xnb_cksum_offload = B_FALSE; 1735 1736 /* Check whether our peer knows and requests hypervisor copy */ 1737 if (xenbus_scanf(XBT_NULL, oename, "request-rx-copy", "%d", &i) 1738 != 0) 1739 i = 0; 1740 if (i != 0) 1741 xnbp->xnb_hv_copy = B_TRUE; 1742 1743 /* 1744 * 1. allocate a vaddr for the tx page, one for the rx page. 1745 * 2. call GNTTABOP_map_grant_ref to map the relevant pages 1746 * into the allocated vaddr (one for tx, one for rx). 1747 * 3. call EVTCHNOP_bind_interdomain to have the event channel 1748 * bound to this domain. 1749 * 4. associate the event channel with an interrupt. 1750 * 5. declare ourselves connected. 1751 * 6. enable the interrupt. 1752 */ 1753 1754 /* 1.tx */ 1755 xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1756 0, 0, 0, 0, VM_SLEEP); 1757 ASSERT(xnbp->xnb_tx_ring_addr != NULL); 1758 1759 /* 2.tx */ 1760 map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr); 1761 map_op.flags = GNTMAP_host_map; 1762 map_op.ref = xnbp->xnb_tx_ring_ref; 1763 map_op.dom = xnbp->xnb_peer; 1764 hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr, NULL); 1765 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 || 1766 map_op.status != 0) { 1767 cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page."); 1768 goto fail; 1769 } 1770 xnbp->xnb_tx_ring_handle = map_op.handle; 1771 1772 /* LINTED: constant in conditional context */ 1773 BACK_RING_INIT(&xnbp->xnb_tx_ring, 1774 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1775 1776 /* 1.rx */ 1777 xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1778 0, 0, 0, 0, VM_SLEEP); 1779 ASSERT(xnbp->xnb_rx_ring_addr != NULL); 1780 1781 /* 2.rx */ 1782 map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr); 1783 map_op.flags = GNTMAP_host_map; 1784 map_op.ref = xnbp->xnb_rx_ring_ref; 1785 map_op.dom = xnbp->xnb_peer; 1786 hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr, NULL); 1787 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 || 1788 map_op.status != 0) { 1789 cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page."); 1790 goto fail; 1791 } 1792 xnbp->xnb_rx_ring_handle = map_op.handle; 1793 1794 /* LINTED: constant in conditional context */ 1795 BACK_RING_INIT(&xnbp->xnb_rx_ring, 1796 (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE); 1797 1798 /* 3 */ 1799 if (xvdi_bind_evtchn(dip, evtchn) != DDI_SUCCESS) { 1800 cmn_err(CE_WARN, "xnb_connect_rings: " 1801 "cannot bind event channel %d", xnbp->xnb_evtchn); 1802 xnbp->xnb_evtchn = INVALID_EVTCHN; 1803 goto fail; 1804 } 1805 xnbp->xnb_evtchn = xvdi_get_evtchn(dip); 1806 1807 /* 1808 * It would be good to set the state to XenbusStateConnected 1809 * here as well, but then what if ddi_add_intr() failed? 1810 * Changing the state in the store will be noticed by the peer 1811 * and cannot be "taken back". 1812 */ 1813 mutex_enter(&xnbp->xnb_tx_lock); 1814 mutex_enter(&xnbp->xnb_rx_lock); 1815 1816 /* 5.1 */ 1817 xnbp->xnb_connected = B_TRUE; 1818 1819 mutex_exit(&xnbp->xnb_rx_lock); 1820 mutex_exit(&xnbp->xnb_tx_lock); 1821 1822 /* 4, 6 */ 1823 if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp) 1824 != DDI_SUCCESS) { 1825 cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt"); 1826 goto fail; 1827 } 1828 xnbp->xnb_irq = B_TRUE; 1829 1830 /* 5.2 */ 1831 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1832 1833 return (B_TRUE); 1834 1835 fail: 1836 mutex_enter(&xnbp->xnb_tx_lock); 1837 mutex_enter(&xnbp->xnb_rx_lock); 1838 1839 xnbp->xnb_connected = B_FALSE; 1840 mutex_exit(&xnbp->xnb_rx_lock); 1841 mutex_exit(&xnbp->xnb_tx_lock); 1842 1843 return (B_FALSE); 1844 } 1845 1846 static void 1847 xnb_disconnect_rings(dev_info_t *dip) 1848 { 1849 xnb_t *xnbp = ddi_get_driver_private(dip); 1850 1851 if (xnbp->xnb_irq) { 1852 ddi_remove_intr(dip, 0, NULL); 1853 xnbp->xnb_irq = B_FALSE; 1854 } 1855 1856 if (xnbp->xnb_tx_unmop_count > 0) 1857 xnb_tx_perform_pending_unmop(xnbp); 1858 1859 if (xnbp->xnb_evtchn != INVALID_EVTCHN) { 1860 xvdi_free_evtchn(dip); 1861 xnbp->xnb_evtchn = INVALID_EVTCHN; 1862 } 1863 1864 if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) { 1865 struct gnttab_unmap_grant_ref unmap_op; 1866 1867 unmap_op.host_addr = (uint64_t)(uintptr_t) 1868 xnbp->xnb_rx_ring_addr; 1869 unmap_op.dev_bus_addr = 0; 1870 unmap_op.handle = xnbp->xnb_rx_ring_handle; 1871 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1872 &unmap_op, 1) != 0) 1873 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1874 "cannot unmap rx-ring page (%d)", 1875 unmap_op.status); 1876 1877 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 1878 } 1879 1880 if (xnbp->xnb_rx_ring_addr != NULL) { 1881 hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr); 1882 vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE); 1883 xnbp->xnb_rx_ring_addr = NULL; 1884 } 1885 1886 if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) { 1887 struct gnttab_unmap_grant_ref unmap_op; 1888 1889 unmap_op.host_addr = (uint64_t)(uintptr_t) 1890 xnbp->xnb_tx_ring_addr; 1891 unmap_op.dev_bus_addr = 0; 1892 unmap_op.handle = xnbp->xnb_tx_ring_handle; 1893 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1894 &unmap_op, 1) != 0) 1895 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1896 "cannot unmap tx-ring page (%d)", 1897 unmap_op.status); 1898 1899 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 1900 } 1901 1902 if (xnbp->xnb_tx_ring_addr != NULL) { 1903 hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr); 1904 vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE); 1905 xnbp->xnb_tx_ring_addr = NULL; 1906 } 1907 } 1908 1909 /*ARGSUSED*/ 1910 static void 1911 xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1912 void *arg, void *impl_data) 1913 { 1914 xnb_t *xnbp = ddi_get_driver_private(dip); 1915 XenbusState new_state = *(XenbusState *)impl_data; 1916 1917 ASSERT(xnbp != NULL); 1918 1919 switch (new_state) { 1920 case XenbusStateConnected: 1921 /* spurious state change */ 1922 if (xnbp->xnb_connected) 1923 return; 1924 1925 if (xnb_connect_rings(dip)) { 1926 xnbp->xnb_flavour->xf_peer_connected(xnbp); 1927 } else { 1928 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1929 xnb_disconnect_rings(dip); 1930 (void) xvdi_switch_state(dip, XBT_NULL, 1931 XenbusStateClosed); 1932 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1933 } 1934 1935 /* 1936 * Now that we've attempted to connect it's reasonable 1937 * to allow an attempt to detach. 1938 */ 1939 xnbp->xnb_detachable = B_TRUE; 1940 1941 break; 1942 1943 case XenbusStateClosing: 1944 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); 1945 1946 break; 1947 1948 case XenbusStateClosed: 1949 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1950 1951 mutex_enter(&xnbp->xnb_tx_lock); 1952 mutex_enter(&xnbp->xnb_rx_lock); 1953 1954 xnb_disconnect_rings(dip); 1955 xnbp->xnb_connected = B_FALSE; 1956 1957 mutex_exit(&xnbp->xnb_rx_lock); 1958 mutex_exit(&xnbp->xnb_tx_lock); 1959 1960 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1961 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1962 /* 1963 * In all likelyhood this is already set (in the above 1964 * case), but if the peer never attempted to connect 1965 * and the domain is destroyed we get here without 1966 * having been through the case above, so we set it to 1967 * be sure. 1968 */ 1969 xnbp->xnb_detachable = B_TRUE; 1970 1971 break; 1972 1973 default: 1974 break; 1975 } 1976 } 1977 1978 /*ARGSUSED*/ 1979 static void 1980 xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1981 void *arg, void *impl_data) 1982 { 1983 xnb_t *xnbp = ddi_get_driver_private(dip); 1984 xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data; 1985 boolean_t success; 1986 1987 ASSERT(xnbp != NULL); 1988 1989 switch (state) { 1990 case Connected: 1991 1992 /* spurious hotplug event */ 1993 if (xnbp->xnb_hotplugged) 1994 return; 1995 1996 success = xnbp->xnb_flavour->xf_hotplug_connected(xnbp); 1997 1998 mutex_enter(&xnbp->xnb_tx_lock); 1999 mutex_enter(&xnbp->xnb_rx_lock); 2000 2001 xnbp->xnb_hotplugged = success; 2002 2003 mutex_exit(&xnbp->xnb_rx_lock); 2004 mutex_exit(&xnbp->xnb_tx_lock); 2005 break; 2006 2007 default: 2008 break; 2009 } 2010 } 2011 2012 static struct modldrv modldrv = { 2013 &mod_miscops, "xnb", 2014 }; 2015 2016 static struct modlinkage modlinkage = { 2017 MODREV_1, &modldrv, NULL 2018 }; 2019 2020 int 2021 _init(void) 2022 { 2023 int i; 2024 2025 mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL); 2026 2027 xnb_txbuf_cachep = kmem_cache_create("xnb_txbuf_cachep", 2028 sizeof (xnb_txbuf_t), 0, xnb_txbuf_constructor, 2029 xnb_txbuf_destructor, NULL, NULL, NULL, 0); 2030 ASSERT(xnb_txbuf_cachep != NULL); 2031 2032 i = mod_install(&modlinkage); 2033 if (i != DDI_SUCCESS) { 2034 kmem_cache_destroy(xnb_txbuf_cachep); 2035 mutex_destroy(&xnb_alloc_page_lock); 2036 } 2037 return (i); 2038 } 2039 2040 int 2041 _info(struct modinfo *modinfop) 2042 { 2043 return (mod_info(&modlinkage, modinfop)); 2044 } 2045 2046 int 2047 _fini(void) 2048 { 2049 int i; 2050 2051 i = mod_remove(&modlinkage); 2052 if (i == DDI_SUCCESS) { 2053 kmem_cache_destroy(xnb_txbuf_cachep); 2054 mutex_destroy(&xnb_alloc_page_lock); 2055 } 2056 return (i); 2057 } 2058