1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifdef DEBUG 28 #define XNB_DEBUG 1 29 #endif /* DEBUG */ 30 31 #include "xnb.h" 32 33 #include <sys/sunddi.h> 34 #include <sys/sunndi.h> 35 #include <sys/modctl.h> 36 #include <sys/conf.h> 37 #include <sys/mac.h> 38 #include <sys/mac_impl.h> /* XXXXBOW - remove, included for mac_fix_cksum() */ 39 #include <sys/dlpi.h> 40 #include <sys/strsubr.h> 41 #include <sys/strsun.h> 42 #include <sys/types.h> 43 #include <sys/pattr.h> 44 #include <vm/seg_kmem.h> 45 #include <vm/hat_i86.h> 46 #include <xen/sys/xenbus_impl.h> 47 #include <xen/sys/xendev.h> 48 #include <sys/balloon_impl.h> 49 #include <sys/evtchn_impl.h> 50 #include <sys/gnttab.h> 51 #include <vm/vm_dep.h> 52 53 #include <sys/gld.h> 54 #include <inet/ip.h> 55 #include <inet/ip_impl.h> 56 #include <sys/vnic_impl.h> /* blech. */ 57 58 /* 59 * The terms "transmit" and "receive" are used in alignment with domU, 60 * which means that packets originating from the peer domU are "transmitted" 61 * to other parts of the system and packets are "received" from them. 62 */ 63 64 /* 65 * XXPV dme: things to do, as well as various things indicated 66 * throughout the source: 67 * - copy avoidance outbound. 68 * - copy avoidance inbound. 69 * - transfer credit limiting. 70 * - MAC address based filtering. 71 */ 72 73 /* 74 * Linux expects to have some headroom in received buffers. The Linux 75 * frontend driver (netfront) checks to see if the headroom is 76 * available and will re-allocate the buffer to make room if 77 * necessary. To avoid this we add RX_BUFFER_HEADROOM bytes of 78 * headroom to each packet we pass to the peer. 79 */ 80 #define RX_BUFFER_HEADROOM 16 81 82 /* 83 * Should we attempt to defer checksum calculation? 84 */ 85 static boolean_t xnb_cksum_offload = B_TRUE; 86 /* 87 * When receiving packets from a guest, should they be copied 88 * or used as-is (esballoc)? 89 */ 90 static boolean_t xnb_tx_always_copy = B_TRUE; 91 92 static boolean_t xnb_connect_rings(dev_info_t *); 93 static void xnb_disconnect_rings(dev_info_t *); 94 static void xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t, 95 void *, void *); 96 static void xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t, 97 void *, void *); 98 99 static int xnb_txbuf_constructor(void *, void *, int); 100 static void xnb_txbuf_destructor(void *, void *); 101 static xnb_txbuf_t *xnb_txbuf_get(xnb_t *, int); 102 static void xnb_txbuf_put(xnb_t *, xnb_txbuf_t *); 103 static void xnb_tx_notify_peer(xnb_t *); 104 static void xnb_tx_complete(xnb_txbuf_t *); 105 static void xnb_tx_mark_complete(xnb_t *, RING_IDX, int16_t); 106 static void xnb_tx_schedule_unmop(xnb_t *, gnttab_map_grant_ref_t *, 107 xnb_txbuf_t *); 108 static void xnb_tx_perform_pending_unmop(xnb_t *); 109 mblk_t *xnb_copy_to_peer(xnb_t *, mblk_t *); 110 111 int xnb_unmop_lowwat = NET_TX_RING_SIZE >> 2; 112 int xnb_unmop_hiwat = NET_TX_RING_SIZE - (NET_TX_RING_SIZE >> 2); 113 114 115 boolean_t xnb_hv_copy = B_TRUE; 116 boolean_t xnb_explicit_pageflip_set = B_FALSE; 117 118 /* XXPV dme: are these really invalid? */ 119 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) 120 #define INVALID_GRANT_REF ((grant_ref_t)-1) 121 122 static kmem_cache_t *xnb_txbuf_cachep; 123 static kmutex_t xnb_alloc_page_lock; 124 125 /* 126 * Statistics. 127 */ 128 static char *aux_statistics[] = { 129 "rx_cksum_deferred", 130 "tx_cksum_no_need", 131 "rx_rsp_notok", 132 "tx_notify_deferred", 133 "tx_notify_sent", 134 "rx_notify_deferred", 135 "rx_notify_sent", 136 "tx_too_early", 137 "rx_too_early", 138 "rx_allocb_failed", 139 "tx_allocb_failed", 140 "rx_foreign_page", 141 "mac_full", 142 "spurious_intr", 143 "allocation_success", 144 "allocation_failure", 145 "small_allocation_success", 146 "small_allocation_failure", 147 "other_allocation_failure", 148 "rx_pageboundary_crossed", 149 "rx_cpoparea_grown", 150 "csum_hardware", 151 "csum_software", 152 }; 153 154 static int 155 xnb_ks_aux_update(kstat_t *ksp, int flag) 156 { 157 xnb_t *xnbp; 158 kstat_named_t *knp; 159 160 if (flag != KSTAT_READ) 161 return (EACCES); 162 163 xnbp = ksp->ks_private; 164 knp = ksp->ks_data; 165 166 /* 167 * Assignment order should match that of the names in 168 * aux_statistics. 169 */ 170 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_deferred; 171 (knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_no_need; 172 (knp++)->value.ui64 = xnbp->xnb_stat_rx_rsp_notok; 173 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred; 174 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent; 175 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred; 176 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent; 177 (knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early; 178 (knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early; 179 (knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed; 180 (knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed; 181 (knp++)->value.ui64 = xnbp->xnb_stat_rx_foreign_page; 182 (knp++)->value.ui64 = xnbp->xnb_stat_mac_full; 183 (knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr; 184 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_success; 185 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure; 186 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success; 187 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure; 188 (knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure; 189 (knp++)->value.ui64 = xnbp->xnb_stat_rx_pagebndry_crossed; 190 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cpoparea_grown; 191 (knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware; 192 (knp++)->value.ui64 = xnbp->xnb_stat_csum_software; 193 194 return (0); 195 } 196 197 static boolean_t 198 xnb_ks_init(xnb_t *xnbp) 199 { 200 int nstat = sizeof (aux_statistics) / 201 sizeof (aux_statistics[0]); 202 char **cp = aux_statistics; 203 kstat_named_t *knp; 204 205 /* 206 * Create and initialise kstats. 207 */ 208 xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo), 209 ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net", 210 KSTAT_TYPE_NAMED, nstat, 0); 211 if (xnbp->xnb_kstat_aux == NULL) 212 return (B_FALSE); 213 214 xnbp->xnb_kstat_aux->ks_private = xnbp; 215 xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update; 216 217 knp = xnbp->xnb_kstat_aux->ks_data; 218 while (nstat > 0) { 219 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 220 221 knp++; 222 cp++; 223 nstat--; 224 } 225 226 kstat_install(xnbp->xnb_kstat_aux); 227 228 return (B_TRUE); 229 } 230 231 static void 232 xnb_ks_free(xnb_t *xnbp) 233 { 234 kstat_delete(xnbp->xnb_kstat_aux); 235 } 236 237 /* 238 * Software checksum calculation and insertion for an arbitrary packet. 239 */ 240 /*ARGSUSED*/ 241 static mblk_t * 242 xnb_software_csum(xnb_t *xnbp, mblk_t *mp) 243 { 244 /* 245 * XXPV dme: shouldn't rely on vnic_fix_cksum(), not least 246 * because it doesn't cover all of the interesting cases :-( 247 */ 248 (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, 249 HCK_FULLCKSUM, KM_NOSLEEP); 250 251 return (mac_fix_cksum(mp)); 252 } 253 254 mblk_t * 255 xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) 256 { 257 struct ether_header *ehp; 258 uint16_t sap; 259 uint32_t offset; 260 ipha_t *ipha; 261 262 ASSERT(mp->b_next == NULL); 263 264 /* 265 * Check that the packet is contained in a single mblk. In 266 * the "from peer" path this is true today, but will change 267 * when scatter gather support is added. In the "to peer" 268 * path we cannot be sure, but in most cases it will be true 269 * (in the xnbo case the packet has come from a MAC device 270 * which is unlikely to split packets). 271 */ 272 if (mp->b_cont != NULL) 273 goto software; 274 275 /* 276 * If the MAC has no hardware capability don't do any further 277 * checking. 278 */ 279 if (capab == 0) 280 goto software; 281 282 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 283 ehp = (struct ether_header *)mp->b_rptr; 284 285 if (ntohs(ehp->ether_type) == VLAN_TPID) { 286 struct ether_vlan_header *evhp; 287 288 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 289 evhp = (struct ether_vlan_header *)mp->b_rptr; 290 sap = ntohs(evhp->ether_type); 291 offset = sizeof (struct ether_vlan_header); 292 } else { 293 sap = ntohs(ehp->ether_type); 294 offset = sizeof (struct ether_header); 295 } 296 297 /* 298 * We only attempt to do IPv4 packets in hardware. 299 */ 300 if (sap != ETHERTYPE_IP) 301 goto software; 302 303 /* 304 * We know that this is an IPv4 packet. 305 */ 306 ipha = (ipha_t *)(mp->b_rptr + offset); 307 308 switch (ipha->ipha_protocol) { 309 case IPPROTO_TCP: 310 case IPPROTO_UDP: { 311 uint32_t start, length, stuff, cksum; 312 uint16_t *stuffp; 313 314 /* 315 * This is a TCP/IPv4 or UDP/IPv4 packet, for which we 316 * can use full IPv4 and partial checksum offload. 317 */ 318 if ((capab & (HCKSUM_INET_FULL_V4|HCKSUM_INET_PARTIAL)) == 0) 319 break; 320 321 start = IP_SIMPLE_HDR_LENGTH; 322 length = ntohs(ipha->ipha_length); 323 if (ipha->ipha_protocol == IPPROTO_TCP) { 324 stuff = start + TCP_CHECKSUM_OFFSET; 325 cksum = IP_TCP_CSUM_COMP; 326 } else { 327 stuff = start + UDP_CHECKSUM_OFFSET; 328 cksum = IP_UDP_CSUM_COMP; 329 } 330 stuffp = (uint16_t *)(mp->b_rptr + offset + stuff); 331 332 if (capab & HCKSUM_INET_FULL_V4) { 333 /* 334 * Some devices require that the checksum 335 * field of the packet is zero for full 336 * offload. 337 */ 338 *stuffp = 0; 339 340 (void) hcksum_assoc(mp, NULL, NULL, 341 0, 0, 0, 0, 342 HCK_FULLCKSUM, KM_NOSLEEP); 343 344 xnbp->xnb_stat_csum_hardware++; 345 346 return (mp); 347 } 348 349 if (capab & HCKSUM_INET_PARTIAL) { 350 if (*stuffp == 0) { 351 ipaddr_t src, dst; 352 353 /* 354 * Older Solaris guests don't insert 355 * the pseudo-header checksum, so we 356 * calculate it here. 357 */ 358 src = ipha->ipha_src; 359 dst = ipha->ipha_dst; 360 361 cksum += (dst >> 16) + (dst & 0xFFFF); 362 cksum += (src >> 16) + (src & 0xFFFF); 363 cksum += length - IP_SIMPLE_HDR_LENGTH; 364 365 cksum = (cksum >> 16) + (cksum & 0xFFFF); 366 cksum = (cksum >> 16) + (cksum & 0xFFFF); 367 368 ASSERT(cksum <= 0xFFFF); 369 370 *stuffp = (uint16_t)(cksum ? cksum : ~cksum); 371 } 372 373 (void) hcksum_assoc(mp, NULL, NULL, 374 start, stuff, length, 0, 375 HCK_PARTIALCKSUM, KM_NOSLEEP); 376 377 xnbp->xnb_stat_csum_hardware++; 378 379 return (mp); 380 } 381 382 /* NOTREACHED */ 383 break; 384 } 385 386 default: 387 /* Use software. */ 388 break; 389 } 390 391 software: 392 /* 393 * We are not able to use any offload so do the whole thing in 394 * software. 395 */ 396 xnbp->xnb_stat_csum_software++; 397 398 return (xnb_software_csum(xnbp, mp)); 399 } 400 401 int 402 xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data) 403 { 404 xnb_t *xnbp; 405 char *xsname, mac[ETHERADDRL * 3]; 406 407 xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP); 408 409 xnbp->xnb_flavour = flavour; 410 xnbp->xnb_flavour_data = flavour_data; 411 xnbp->xnb_devinfo = dip; 412 xnbp->xnb_evtchn = INVALID_EVTCHN; 413 xnbp->xnb_irq = B_FALSE; 414 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 415 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 416 xnbp->xnb_cksum_offload = xnb_cksum_offload; 417 xnbp->xnb_connected = B_FALSE; 418 xnbp->xnb_hotplugged = B_FALSE; 419 xnbp->xnb_detachable = B_FALSE; 420 xnbp->xnb_peer = xvdi_get_oeid(dip); 421 xnbp->xnb_tx_pages_writable = B_FALSE; 422 xnbp->xnb_tx_always_copy = xnb_tx_always_copy; 423 424 xnbp->xnb_tx_buf_count = 0; 425 xnbp->xnb_tx_unmop_count = 0; 426 427 xnbp->xnb_hv_copy = B_FALSE; 428 429 xnbp->xnb_rx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 430 ASSERT(xnbp->xnb_rx_va != NULL); 431 432 if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie) 433 != DDI_SUCCESS) 434 goto failure; 435 436 /* allocated on demand, when/if we enter xnb_copy_to_peer() */ 437 xnbp->xnb_rx_cpop = NULL; 438 xnbp->xnb_cpop_sz = 0; 439 440 mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER, 441 xnbp->xnb_icookie); 442 mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER, 443 xnbp->xnb_icookie); 444 445 /* set driver private pointer now */ 446 ddi_set_driver_private(dip, xnbp); 447 448 if (!xnb_ks_init(xnbp)) 449 goto failure_1; 450 451 /* 452 * Receive notification of changes in the state of the 453 * driver in the guest domain. 454 */ 455 if (xvdi_add_event_handler(dip, XS_OE_STATE, xnb_oe_state_change, 456 NULL) != DDI_SUCCESS) 457 goto failure_2; 458 459 /* 460 * Receive notification of hotplug events. 461 */ 462 if (xvdi_add_event_handler(dip, XS_HP_STATE, xnb_hp_state_change, 463 NULL) != DDI_SUCCESS) 464 goto failure_2; 465 466 xsname = xvdi_get_xsname(dip); 467 468 if (xenbus_printf(XBT_NULL, xsname, 469 "feature-no-csum-offload", "%d", 470 xnbp->xnb_cksum_offload ? 0 : 1) != 0) 471 goto failure_3; 472 473 /* 474 * Use global xnb_hv_copy to export this feature. This means that 475 * we have to decide what to do before starting up a guest domain 476 */ 477 if (xenbus_printf(XBT_NULL, xsname, 478 "feature-rx-copy", "%d", xnb_hv_copy ? 1 : 0) != 0) 479 goto failure_3; 480 /* 481 * Linux domUs seem to depend on "feature-rx-flip" being 0 482 * in addition to "feature-rx-copy" being 1. It seems strange 483 * to use four possible states to describe a binary decision, 484 * but we might as well play nice. 485 */ 486 if (xenbus_printf(XBT_NULL, xsname, 487 "feature-rx-flip", "%d", xnb_explicit_pageflip_set ? 1 : 0) != 0) 488 goto failure_3; 489 490 if (xenbus_scanf(XBT_NULL, xsname, 491 "mac", "%s", mac) != 0) { 492 cmn_err(CE_WARN, "xnb_attach: " 493 "cannot read mac address from %s", 494 xsname); 495 goto failure_3; 496 } 497 498 if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) { 499 cmn_err(CE_WARN, 500 "xnb_attach: cannot parse mac address %s", 501 mac); 502 goto failure_3; 503 } 504 505 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait); 506 (void) xvdi_post_event(dip, XEN_HP_ADD); 507 508 return (DDI_SUCCESS); 509 510 failure_3: 511 xvdi_remove_event_handler(dip, NULL); 512 513 failure_2: 514 xnb_ks_free(xnbp); 515 516 failure_1: 517 mutex_destroy(&xnbp->xnb_rx_lock); 518 mutex_destroy(&xnbp->xnb_tx_lock); 519 520 failure: 521 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE); 522 kmem_free(xnbp, sizeof (*xnbp)); 523 return (DDI_FAILURE); 524 } 525 526 /*ARGSUSED*/ 527 void 528 xnb_detach(dev_info_t *dip) 529 { 530 xnb_t *xnbp = ddi_get_driver_private(dip); 531 532 ASSERT(xnbp != NULL); 533 ASSERT(!xnbp->xnb_connected); 534 ASSERT(xnbp->xnb_tx_buf_count == 0); 535 536 xnb_disconnect_rings(dip); 537 538 xvdi_remove_event_handler(dip, NULL); 539 540 xnb_ks_free(xnbp); 541 542 ddi_set_driver_private(dip, NULL); 543 544 mutex_destroy(&xnbp->xnb_tx_lock); 545 mutex_destroy(&xnbp->xnb_rx_lock); 546 547 if (xnbp->xnb_cpop_sz > 0) 548 kmem_free(xnbp->xnb_rx_cpop, sizeof (*xnbp->xnb_rx_cpop) 549 * xnbp->xnb_cpop_sz); 550 551 ASSERT(xnbp->xnb_rx_va != NULL); 552 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE); 553 554 kmem_free(xnbp, sizeof (*xnbp)); 555 } 556 557 558 static mfn_t 559 xnb_alloc_page(xnb_t *xnbp) 560 { 561 #define WARNING_RATE_LIMIT 100 562 #define BATCH_SIZE 256 563 static mfn_t mfns[BATCH_SIZE]; /* common across all instances */ 564 static int nth = BATCH_SIZE; 565 mfn_t mfn; 566 567 mutex_enter(&xnb_alloc_page_lock); 568 if (nth == BATCH_SIZE) { 569 if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) { 570 xnbp->xnb_stat_allocation_failure++; 571 mutex_exit(&xnb_alloc_page_lock); 572 573 /* 574 * Try for a single page in low memory situations. 575 */ 576 if (balloon_alloc_pages(1, &mfn) != 1) { 577 if ((xnbp->xnb_stat_small_allocation_failure++ 578 % WARNING_RATE_LIMIT) == 0) 579 cmn_err(CE_WARN, "xnb_alloc_page: " 580 "Cannot allocate memory to " 581 "transfer packets to peer."); 582 return (0); 583 } else { 584 xnbp->xnb_stat_small_allocation_success++; 585 return (mfn); 586 } 587 } 588 589 nth = 0; 590 xnbp->xnb_stat_allocation_success++; 591 } 592 593 mfn = mfns[nth++]; 594 mutex_exit(&xnb_alloc_page_lock); 595 596 ASSERT(mfn != 0); 597 598 return (mfn); 599 #undef BATCH_SIZE 600 #undef WARNING_RATE_LIMIT 601 } 602 603 /*ARGSUSED*/ 604 static void 605 xnb_free_page(xnb_t *xnbp, mfn_t mfn) 606 { 607 int r; 608 pfn_t pfn; 609 610 pfn = xen_assign_pfn(mfn); 611 pfnzero(pfn, 0, PAGESIZE); 612 xen_release_pfn(pfn); 613 614 /* 615 * This happens only in the error path, so batching is 616 * not worth the complication. 617 */ 618 if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) { 619 cmn_err(CE_WARN, "free_page: cannot decrease memory " 620 "reservation (%d): page kept but unusable (mfn = 0x%lx).", 621 r, mfn); 622 } 623 } 624 625 /* 626 * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but 627 * using local variables. 628 */ 629 #define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \ 630 ((((_r)->sring->req_prod - loop) < \ 631 (RING_SIZE(_r) - (loop - prod))) ? \ 632 ((_r)->sring->req_prod - loop) : \ 633 (RING_SIZE(_r) - (loop - prod))) 634 635 mblk_t * 636 xnb_to_peer(xnb_t *xnbp, mblk_t *mp) 637 { 638 mblk_t *free = mp, *prev = NULL; 639 size_t len; 640 gnttab_transfer_t *gop; 641 boolean_t notify; 642 RING_IDX loop, prod, end; 643 644 /* 645 * For each packet the sequence of operations is: 646 * 647 * 1. get a new page from the hypervisor. 648 * 2. get a request slot from the ring. 649 * 3. copy the data into the new page. 650 * 4. transfer the page to the peer. 651 * 5. update the request slot. 652 * 6. kick the peer. 653 * 7. free mp. 654 * 655 * In order to reduce the number of hypercalls, we prepare 656 * several packets for the peer and perform a single hypercall 657 * to transfer them. 658 */ 659 660 mutex_enter(&xnbp->xnb_rx_lock); 661 662 /* 663 * If we are not connected to the peer or have not yet 664 * finished hotplug it is too early to pass packets to the 665 * peer. 666 */ 667 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 668 mutex_exit(&xnbp->xnb_rx_lock); 669 DTRACE_PROBE(flip_rx_too_early); 670 xnbp->xnb_stat_rx_too_early++; 671 return (mp); 672 } 673 674 loop = xnbp->xnb_rx_ring.req_cons; 675 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 676 gop = xnbp->xnb_rx_top; 677 678 while ((mp != NULL) && 679 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 680 681 mfn_t mfn; 682 pfn_t pfn; 683 netif_rx_request_t *rxreq; 684 netif_rx_response_t *rxresp; 685 char *valoop; 686 size_t offset; 687 mblk_t *ml; 688 uint16_t cksum_flags; 689 690 /* 1 */ 691 if ((mfn = xnb_alloc_page(xnbp)) == 0) { 692 xnbp->xnb_stat_rx_defer++; 693 break; 694 } 695 696 /* 2 */ 697 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 698 699 #ifdef XNB_DEBUG 700 if (!(rxreq->id < NET_RX_RING_SIZE)) 701 cmn_err(CE_PANIC, "xnb_to_peer: " 702 "id %d out of range in request 0x%p", 703 rxreq->id, (void *)rxreq); 704 #endif /* XNB_DEBUG */ 705 706 /* Assign a pfn and map the new page at the allocated va. */ 707 pfn = xen_assign_pfn(mfn); 708 hat_devload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE, 709 pfn, PROT_READ | PROT_WRITE, HAT_LOAD); 710 711 offset = RX_BUFFER_HEADROOM; 712 713 /* 3 */ 714 len = 0; 715 valoop = xnbp->xnb_rx_va + offset; 716 for (ml = mp; ml != NULL; ml = ml->b_cont) { 717 size_t chunk = ml->b_wptr - ml->b_rptr; 718 719 bcopy(ml->b_rptr, valoop, chunk); 720 valoop += chunk; 721 len += chunk; 722 } 723 724 ASSERT(len + offset < PAGESIZE); 725 726 /* Release the pfn. */ 727 hat_unload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE, 728 HAT_UNLOAD_UNMAP); 729 xen_release_pfn(pfn); 730 731 /* 4 */ 732 gop->mfn = mfn; 733 gop->domid = xnbp->xnb_peer; 734 gop->ref = rxreq->gref; 735 736 /* 5.1 */ 737 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 738 rxresp->offset = offset; 739 rxresp->flags = 0; 740 741 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 742 if (cksum_flags != 0) 743 xnbp->xnb_stat_rx_cksum_deferred++; 744 rxresp->flags |= cksum_flags; 745 746 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 747 rxresp->status = len; 748 749 loop++; 750 prod++; 751 gop++; 752 prev = mp; 753 mp = mp->b_next; 754 } 755 756 /* 757 * Did we actually do anything? 758 */ 759 if (loop == xnbp->xnb_rx_ring.req_cons) { 760 mutex_exit(&xnbp->xnb_rx_lock); 761 return (mp); 762 } 763 764 end = loop; 765 766 /* 767 * Unlink the end of the 'done' list from the remainder. 768 */ 769 ASSERT(prev != NULL); 770 prev->b_next = NULL; 771 772 if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_rx_top, 773 loop - xnbp->xnb_rx_ring.req_cons) != 0) { 774 cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed"); 775 } 776 777 loop = xnbp->xnb_rx_ring.req_cons; 778 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 779 gop = xnbp->xnb_rx_top; 780 781 while (loop < end) { 782 int16_t status = NETIF_RSP_OKAY; 783 784 if (gop->status != 0) { 785 status = NETIF_RSP_ERROR; 786 787 /* 788 * If the status is anything other than 789 * GNTST_bad_page then we don't own the page 790 * any more, so don't try to give it back. 791 */ 792 if (gop->status != GNTST_bad_page) 793 gop->mfn = 0; 794 } else { 795 /* The page is no longer ours. */ 796 gop->mfn = 0; 797 } 798 799 if (gop->mfn != 0) 800 /* 801 * Give back the page, as we won't be using 802 * it. 803 */ 804 xnb_free_page(xnbp, gop->mfn); 805 else 806 /* 807 * We gave away a page, update our accounting 808 * now. 809 */ 810 balloon_drv_subtracted(1); 811 812 /* 5.2 */ 813 if (status != NETIF_RSP_OKAY) { 814 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 815 status; 816 } else { 817 xnbp->xnb_stat_ipackets++; 818 xnbp->xnb_stat_rbytes += len; 819 } 820 821 loop++; 822 prod++; 823 gop++; 824 } 825 826 xnbp->xnb_rx_ring.req_cons = loop; 827 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 828 829 /* 6 */ 830 /* LINTED: constant in conditional context */ 831 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 832 if (notify) { 833 ec_notify_via_evtchn(xnbp->xnb_evtchn); 834 xnbp->xnb_stat_rx_notify_sent++; 835 } else { 836 xnbp->xnb_stat_rx_notify_deferred++; 837 } 838 839 if (mp != NULL) 840 xnbp->xnb_stat_rx_defer++; 841 842 mutex_exit(&xnbp->xnb_rx_lock); 843 844 /* Free mblk_t's that we consumed. */ 845 freemsgchain(free); 846 847 return (mp); 848 } 849 850 /* helper functions for xnb_copy_to_peer */ 851 852 /* 853 * Grow the array of copy operation descriptors. 854 * Returns a pointer to the next available entry. 855 */ 856 gnttab_copy_t * 857 grow_cpop_area(xnb_t *xnbp, gnttab_copy_t *o_cpop) 858 { 859 /* 860 * o_cpop (arg.1) is a ptr to the area we would like to copy 861 * something into but cannot, because we haven't alloc'ed it 862 * yet, or NULL. 863 * old_cpop and new_cpop (local) are pointers to old/new 864 * versions of xnbp->xnb_rx_cpop. 865 */ 866 gnttab_copy_t *new_cpop, *old_cpop, *ret_cpop; 867 size_t newcount; 868 869 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 870 871 old_cpop = xnbp->xnb_rx_cpop; 872 /* 873 * o_cpop is a pointer into the array pointed to by old_cpop; 874 * it would be an error for exactly one of these pointers to be NULL. 875 * We shouldn't call this function if xnb_rx_cpop has already 876 * been allocated, but we're starting to fill it from the beginning 877 * again. 878 */ 879 ASSERT((o_cpop == NULL && old_cpop == NULL) || 880 (o_cpop != NULL && old_cpop != NULL && o_cpop != old_cpop)); 881 882 newcount = xnbp->xnb_cpop_sz + CPOP_DEFCNT; 883 884 new_cpop = kmem_alloc(sizeof (*new_cpop) * newcount, KM_NOSLEEP); 885 if (new_cpop == NULL) { 886 xnbp->xnb_stat_other_allocation_failure++; 887 return (NULL); 888 } 889 890 if (o_cpop != NULL) { 891 size_t offset = (o_cpop - old_cpop); 892 893 /* we only need to move the parts in use ... */ 894 (void) memmove(new_cpop, old_cpop, xnbp->xnb_cpop_sz * 895 (sizeof (*old_cpop))); 896 897 kmem_free(old_cpop, xnbp->xnb_cpop_sz * sizeof (*old_cpop)); 898 899 ret_cpop = new_cpop + offset; 900 } else { 901 ret_cpop = new_cpop; 902 } 903 904 xnbp->xnb_rx_cpop = new_cpop; 905 xnbp->xnb_cpop_sz = newcount; 906 907 xnbp->xnb_stat_rx_cpoparea_grown++; 908 909 return (ret_cpop); 910 } 911 912 /* 913 * Check whether an address is on a page that's foreign to this domain. 914 */ 915 static boolean_t 916 is_foreign(void *addr) 917 { 918 pfn_t pfn = hat_getpfnum(kas.a_hat, addr); 919 920 return (pfn & PFN_IS_FOREIGN_MFN ? B_TRUE : B_FALSE); 921 } 922 923 /* 924 * Insert a newly allocated mblk into a chain, replacing the old one. 925 */ 926 static mblk_t * 927 replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev) 928 { 929 uint32_t start, stuff, end, value, flags; 930 mblk_t *new_mp; 931 932 new_mp = copyb(mp); 933 if (new_mp == NULL) 934 cmn_err(CE_PANIC, "replace_msg: cannot alloc new message" 935 "for %p, len %lu", (void *) mp, len); 936 937 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 938 (void) hcksum_assoc(new_mp, NULL, NULL, start, stuff, end, value, 939 flags, KM_NOSLEEP); 940 941 new_mp->b_next = mp->b_next; 942 new_mp->b_prev = mp->b_prev; 943 new_mp->b_cont = mp->b_cont; 944 945 /* Make sure we only overwrite pointers to the mblk being replaced. */ 946 if (mp_prev != NULL && mp_prev->b_next == mp) 947 mp_prev->b_next = new_mp; 948 949 if (ml_prev != NULL && ml_prev->b_cont == mp) 950 ml_prev->b_cont = new_mp; 951 952 mp->b_next = mp->b_prev = mp->b_cont = NULL; 953 freemsg(mp); 954 955 return (new_mp); 956 } 957 958 /* 959 * Set all the fields in a gnttab_copy_t. 960 */ 961 static void 962 setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr, 963 size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref) 964 { 965 ASSERT(xnbp != NULL && gp != NULL); 966 967 gp->source.offset = s_off; 968 gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr)); 969 gp->source.domid = DOMID_SELF; 970 971 gp->len = (uint16_t)len; 972 gp->flags = GNTCOPY_dest_gref; 973 gp->status = 0; 974 975 gp->dest.u.ref = d_ref; 976 gp->dest.offset = d_off; 977 gp->dest.domid = xnbp->xnb_peer; 978 } 979 980 mblk_t * 981 xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp) 982 { 983 mblk_t *free = mp, *mp_prev = NULL, *saved_mp = mp; 984 mblk_t *ml, *ml_prev; 985 gnttab_copy_t *gop_cp; 986 boolean_t notify; 987 RING_IDX loop, prod; 988 int i; 989 990 if (!xnbp->xnb_hv_copy) 991 return (xnb_to_peer(xnbp, mp)); 992 993 /* 994 * For each packet the sequence of operations is: 995 * 996 * 1. get a request slot from the ring. 997 * 2. set up data for hypercall (see NOTE below) 998 * 3. have the hypervisore copy the data 999 * 4. update the request slot. 1000 * 5. kick the peer. 1001 * 1002 * NOTE ad 2. 1003 * In order to reduce the number of hypercalls, we prepare 1004 * several packets (mp->b_cont != NULL) for the peer and 1005 * perform a single hypercall to transfer them. 1006 * We also have to set up a seperate copy operation for 1007 * every page. 1008 * 1009 * If we have more than one message (mp->b_next != NULL), 1010 * we do this whole dance repeatedly. 1011 */ 1012 1013 mutex_enter(&xnbp->xnb_rx_lock); 1014 1015 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 1016 mutex_exit(&xnbp->xnb_rx_lock); 1017 DTRACE_PROBE(copy_rx_too_early); 1018 xnbp->xnb_stat_rx_too_early++; 1019 return (mp); 1020 } 1021 1022 loop = xnbp->xnb_rx_ring.req_cons; 1023 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 1024 1025 while ((mp != NULL) && 1026 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 1027 netif_rx_request_t *rxreq; 1028 netif_rx_response_t *rxresp; 1029 size_t offset, d_offset; 1030 size_t len; 1031 uint16_t cksum_flags; 1032 int16_t status = NETIF_RSP_OKAY; 1033 int item_count; 1034 1035 /* 1 */ 1036 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 1037 1038 #ifdef XNB_DEBUG 1039 if (!(rxreq->id < NET_RX_RING_SIZE)) 1040 cmn_err(CE_PANIC, "xnb_copy_to_peer: " 1041 "id %d out of range in request 0x%p", 1042 rxreq->id, (void *)rxreq); 1043 #endif /* XNB_DEBUG */ 1044 1045 /* 2 */ 1046 d_offset = offset = RX_BUFFER_HEADROOM; 1047 len = 0; 1048 item_count = 0; 1049 1050 gop_cp = xnbp->xnb_rx_cpop; 1051 1052 /* 1053 * We walk the b_cont pointers and set up a gop_cp 1054 * structure for every page in every data block we have. 1055 */ 1056 /* 2a */ 1057 for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) { 1058 size_t chunk = ml->b_wptr - ml->b_rptr; 1059 uchar_t *r_tmp, *rpt_align; 1060 size_t r_offset; 1061 1062 /* 1063 * If we get an mblk on a page that doesn't belong to 1064 * this domain, get a new mblk to replace the old one. 1065 */ 1066 if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) { 1067 mblk_t *ml_new = replace_msg(ml, chunk, 1068 mp_prev, ml_prev); 1069 1070 /* We can still use old ml, but not *ml! */ 1071 if (free == ml) 1072 free = ml_new; 1073 if (mp == ml) 1074 mp = ml_new; 1075 ml = ml_new; 1076 1077 xnbp->xnb_stat_rx_foreign_page++; 1078 } 1079 1080 rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr); 1081 r_offset = (uint16_t)(ml->b_rptr - rpt_align); 1082 r_tmp = ml->b_rptr; 1083 1084 if (d_offset + chunk > PAGESIZE) 1085 cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p " 1086 "(svd: %p), ml %p,rpt_alg. %p, d_offset " 1087 "(%lu) + chunk (%lu) > PAGESIZE %d!", 1088 (void *)mp, (void *)saved_mp, (void *)ml, 1089 (void *)rpt_align, 1090 d_offset, chunk, (int)PAGESIZE); 1091 1092 while (chunk > 0) { 1093 size_t part_len; 1094 1095 item_count++; 1096 if (item_count > xnbp->xnb_cpop_sz) { 1097 gop_cp = grow_cpop_area(xnbp, gop_cp); 1098 if (gop_cp == NULL) 1099 goto failure; 1100 } 1101 /* 1102 * If our mblk crosses a page boundary, we need 1103 * to do a seperate copy for every page. 1104 */ 1105 if (r_offset + chunk > PAGESIZE) { 1106 part_len = PAGESIZE - r_offset; 1107 1108 DTRACE_PROBE3(mblk_page_crossed, 1109 (mblk_t *), ml, int, chunk, int, 1110 (int)r_offset); 1111 1112 xnbp->xnb_stat_rx_pagebndry_crossed++; 1113 } else { 1114 part_len = chunk; 1115 } 1116 1117 setup_gop(xnbp, gop_cp, r_tmp, r_offset, 1118 d_offset, part_len, rxreq->gref); 1119 1120 chunk -= part_len; 1121 1122 len += part_len; 1123 d_offset += part_len; 1124 r_tmp += part_len; 1125 /* 1126 * The 2nd, 3rd ... last copies will always 1127 * start at r_tmp, therefore r_offset is 0. 1128 */ 1129 r_offset = 0; 1130 gop_cp++; 1131 } 1132 ml_prev = ml; 1133 DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int, 1134 chunk, int, len, int, item_count); 1135 } 1136 /* 3 */ 1137 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_rx_cpop, 1138 item_count) != 0) { 1139 cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed"); 1140 DTRACE_PROBE(HV_granttableopfailed); 1141 } 1142 1143 /* 4 */ 1144 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 1145 rxresp->offset = offset; 1146 1147 rxresp->flags = 0; 1148 1149 DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int, 1150 (int)rxresp->offset, int, (int)rxresp->flags, int, 1151 (int)rxresp->status); 1152 1153 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 1154 if (cksum_flags != 0) 1155 xnbp->xnb_stat_rx_cksum_deferred++; 1156 rxresp->flags |= cksum_flags; 1157 1158 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 1159 rxresp->status = len; 1160 1161 DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int, 1162 (int)rxresp->offset, int, (int)rxresp->flags, int, 1163 (int)rxresp->status); 1164 1165 for (i = 0; i < item_count; i++) { 1166 if (xnbp->xnb_rx_cpop[i].status != 0) { 1167 DTRACE_PROBE2(cpop__status__nonnull, int, 1168 (int)xnbp->xnb_rx_cpop[i].status, 1169 int, i); 1170 status = NETIF_RSP_ERROR; 1171 } 1172 } 1173 1174 /* 5.2 */ 1175 if (status != NETIF_RSP_OKAY) { 1176 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 1177 status; 1178 xnbp->xnb_stat_rx_rsp_notok++; 1179 } else { 1180 xnbp->xnb_stat_ipackets++; 1181 xnbp->xnb_stat_rbytes += len; 1182 } 1183 1184 loop++; 1185 prod++; 1186 mp_prev = mp; 1187 mp = mp->b_next; 1188 } 1189 failure: 1190 /* 1191 * Did we actually do anything? 1192 */ 1193 if (loop == xnbp->xnb_rx_ring.req_cons) { 1194 mutex_exit(&xnbp->xnb_rx_lock); 1195 return (mp); 1196 } 1197 1198 /* 1199 * Unlink the end of the 'done' list from the remainder. 1200 */ 1201 ASSERT(mp_prev != NULL); 1202 mp_prev->b_next = NULL; 1203 1204 xnbp->xnb_rx_ring.req_cons = loop; 1205 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 1206 1207 /* 6 */ 1208 /* LINTED: constant in conditional context */ 1209 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 1210 if (notify) { 1211 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1212 xnbp->xnb_stat_rx_notify_sent++; 1213 } else { 1214 xnbp->xnb_stat_rx_notify_deferred++; 1215 } 1216 1217 if (mp != NULL) 1218 xnbp->xnb_stat_rx_defer++; 1219 1220 mutex_exit(&xnbp->xnb_rx_lock); 1221 1222 /* Free mblk_t structs we have consumed. */ 1223 freemsgchain(free); 1224 1225 return (mp); 1226 } 1227 1228 /*ARGSUSED*/ 1229 static int 1230 xnb_txbuf_constructor(void *buf, void *arg, int kmflag) 1231 { 1232 xnb_txbuf_t *txp = buf; 1233 1234 bzero(txp, sizeof (*txp)); 1235 1236 txp->xt_free_rtn.free_func = xnb_tx_complete; 1237 txp->xt_free_rtn.free_arg = (caddr_t)txp; 1238 1239 txp->xt_mop.host_addr = 1240 (uint64_t)(uintptr_t)vmem_alloc(heap_arena, PAGESIZE, 1241 ((kmflag & KM_NOSLEEP) == KM_NOSLEEP) ? 1242 VM_NOSLEEP : VM_SLEEP); 1243 1244 if (txp->xt_mop.host_addr == NULL) { 1245 cmn_err(CE_WARN, "xnb_txbuf_constructor: " 1246 "cannot get address space"); 1247 return (-1); 1248 } 1249 1250 /* 1251 * Have the hat ensure that page table exists for the VA. 1252 */ 1253 hat_prepare_mapping(kas.a_hat, 1254 (caddr_t)(uintptr_t)txp->xt_mop.host_addr, NULL); 1255 1256 return (0); 1257 } 1258 1259 /*ARGSUSED*/ 1260 static void 1261 xnb_txbuf_destructor(void *buf, void *arg) 1262 { 1263 xnb_txbuf_t *txp = buf; 1264 1265 ASSERT(txp->xt_mop.host_addr != NULL); 1266 ASSERT((txp->xt_flags & XNB_TXBUF_INUSE) == 0); 1267 1268 hat_release_mapping(kas.a_hat, 1269 (caddr_t)(uintptr_t)txp->xt_mop.host_addr); 1270 vmem_free(heap_arena, 1271 (caddr_t)(uintptr_t)txp->xt_mop.host_addr, PAGESIZE); 1272 } 1273 1274 static void 1275 xnb_tx_notify_peer(xnb_t *xnbp) 1276 { 1277 boolean_t notify; 1278 1279 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1280 1281 /* LINTED: constant in conditional context */ 1282 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify); 1283 if (notify) { 1284 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1285 xnbp->xnb_stat_tx_notify_sent++; 1286 } else { 1287 xnbp->xnb_stat_tx_notify_deferred++; 1288 } 1289 } 1290 1291 static void 1292 xnb_tx_complete(xnb_txbuf_t *txp) 1293 { 1294 xnb_t *xnbp = txp->xt_xnbp; 1295 1296 ASSERT((txp->xt_flags & XNB_TXBUF_INUSE) == XNB_TXBUF_INUSE); 1297 1298 mutex_enter(&xnbp->xnb_tx_lock); 1299 xnb_tx_schedule_unmop(xnbp, &txp->xt_mop, txp); 1300 mutex_exit(&xnbp->xnb_tx_lock); 1301 } 1302 1303 static void 1304 xnb_tx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status) 1305 { 1306 RING_IDX i; 1307 netif_tx_response_t *txresp; 1308 1309 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1310 1311 i = xnbp->xnb_tx_ring.rsp_prod_pvt; 1312 1313 txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i); 1314 txresp->id = id; 1315 txresp->status = status; 1316 1317 xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1; 1318 1319 /* 1320 * Note that we don't push the change to the peer here - that 1321 * is the callers responsibility. 1322 */ 1323 } 1324 1325 static void 1326 xnb_tx_schedule_unmop(xnb_t *xnbp, gnttab_map_grant_ref_t *mop, 1327 xnb_txbuf_t *txp) 1328 { 1329 gnttab_unmap_grant_ref_t *unmop; 1330 int u_count; 1331 int reqs_on_ring; 1332 1333 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1334 ASSERT(xnbp->xnb_tx_unmop_count < NET_TX_RING_SIZE); 1335 1336 u_count = xnbp->xnb_tx_unmop_count++; 1337 1338 /* Cache data for the time when we actually unmap grant refs */ 1339 xnbp->xnb_tx_unmop_txp[u_count] = txp; 1340 1341 unmop = &xnbp->xnb_tx_unmop[u_count]; 1342 unmop->host_addr = mop->host_addr; 1343 unmop->dev_bus_addr = mop->dev_bus_addr; 1344 unmop->handle = mop->handle; 1345 1346 /* 1347 * We cannot check the ring once we're disconnected from it. Batching 1348 * doesn't seem to be a useful optimisation in this case either, 1349 * so we directly call into the actual unmap function. 1350 */ 1351 if (xnbp->xnb_connected) { 1352 reqs_on_ring = RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_tx_ring); 1353 1354 /* 1355 * By tuning xnb_unmop_hiwat to N, we can emulate "N per batch" 1356 * or (with N == 1) "immediate unmop" behaviour. 1357 * The "> xnb_unmop_lowwat" is a guard against ring exhaustion. 1358 */ 1359 if (xnbp->xnb_tx_unmop_count < xnb_unmop_hiwat && 1360 reqs_on_ring > xnb_unmop_lowwat) 1361 return; 1362 } 1363 1364 xnb_tx_perform_pending_unmop(xnbp); 1365 } 1366 1367 /* 1368 * Here we perform the actual unmapping of the data that was 1369 * accumulated in xnb_tx_schedule_unmop(). 1370 * Note that it is the caller's responsibility to make sure that 1371 * there's actually something there to unmop. 1372 */ 1373 static void 1374 xnb_tx_perform_pending_unmop(xnb_t *xnbp) 1375 { 1376 RING_IDX loop; 1377 #ifdef XNB_DEBUG 1378 gnttab_unmap_grant_ref_t *unmop; 1379 #endif /* XNB_DEBUG */ 1380 1381 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1382 ASSERT(xnbp->xnb_tx_unmop_count > 0); 1383 1384 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1385 xnbp->xnb_tx_unmop, xnbp->xnb_tx_unmop_count) < 0) { 1386 cmn_err(CE_WARN, "xnb_tx_perform_pending_unmop: " 1387 "unmap grant operation failed, " 1388 "%d pages lost", xnbp->xnb_tx_unmop_count); 1389 } 1390 1391 #ifdef XNB_DEBUG 1392 for (loop = 0, unmop = xnbp->xnb_tx_unmop; 1393 loop < xnbp->xnb_tx_unmop_count; 1394 loop++, unmop++) { 1395 if (unmop->status != 0) { 1396 cmn_err(CE_WARN, "xnb_tx_perform_pending_unmop: " 1397 "unmap grant reference failed (%d)", 1398 unmop->status); 1399 } 1400 } 1401 #endif /* XNB_DEBUG */ 1402 1403 for (loop = 0; loop < xnbp->xnb_tx_unmop_count; loop++) { 1404 xnb_txbuf_t *txp = xnbp->xnb_tx_unmop_txp[loop]; 1405 1406 if (txp == NULL) 1407 cmn_err(CE_PANIC, 1408 "xnb_tx_perform_pending_unmop: " 1409 "unexpected NULL txp (loop %d; count %d)!", 1410 loop, xnbp->xnb_tx_unmop_count); 1411 1412 if (xnbp->xnb_connected) 1413 xnb_tx_mark_complete(xnbp, txp->xt_id, txp->xt_status); 1414 xnb_txbuf_put(xnbp, txp); 1415 } 1416 if (xnbp->xnb_connected) 1417 xnb_tx_notify_peer(xnbp); 1418 1419 xnbp->xnb_tx_unmop_count = 0; 1420 1421 #ifdef XNB_DEBUG 1422 bzero(xnbp->xnb_tx_unmop, sizeof (xnbp->xnb_tx_unmop)); 1423 bzero(xnbp->xnb_tx_unmop_txp, sizeof (xnbp->xnb_tx_unmop_txp)); 1424 #endif /* XNB_DEBUG */ 1425 } 1426 1427 static xnb_txbuf_t * 1428 xnb_txbuf_get(xnb_t *xnbp, int flags) 1429 { 1430 xnb_txbuf_t *txp; 1431 1432 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1433 1434 txp = kmem_cache_alloc(xnb_txbuf_cachep, flags); 1435 if (txp != NULL) { 1436 ASSERT((txp->xt_flags & XNB_TXBUF_INUSE) == 0); 1437 txp->xt_flags |= XNB_TXBUF_INUSE; 1438 1439 txp->xt_xnbp = xnbp; 1440 txp->xt_mop.dom = xnbp->xnb_peer; 1441 1442 txp->xt_mop.flags = GNTMAP_host_map; 1443 if (!xnbp->xnb_tx_pages_writable) 1444 txp->xt_mop.flags |= GNTMAP_readonly; 1445 1446 xnbp->xnb_tx_buf_count++; 1447 } 1448 1449 return (txp); 1450 } 1451 1452 static void 1453 xnb_txbuf_put(xnb_t *xnbp, xnb_txbuf_t *txp) 1454 { 1455 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1456 ASSERT((txp->xt_flags & XNB_TXBUF_INUSE) == XNB_TXBUF_INUSE); 1457 1458 txp->xt_flags &= ~XNB_TXBUF_INUSE; 1459 xnbp->xnb_tx_buf_count--; 1460 1461 kmem_cache_free(xnb_txbuf_cachep, txp); 1462 } 1463 1464 static mblk_t * 1465 xnb_from_peer(xnb_t *xnbp) 1466 { 1467 RING_IDX start, end, loop; 1468 gnttab_map_grant_ref_t *mop; 1469 xnb_txbuf_t **txpp; 1470 netif_tx_request_t *txreq; 1471 boolean_t work_to_do; 1472 mblk_t *head, *tail; 1473 /* 1474 * If the peer granted a read-only mapping to the page then we 1475 * must copy the data, as the local protocol stack (should the 1476 * packet be destined for this host) will modify the packet 1477 * 'in place'. 1478 */ 1479 boolean_t copy = xnbp->xnb_tx_always_copy || 1480 !xnbp->xnb_tx_pages_writable; 1481 1482 /* 1483 * For each individual request, the sequence of actions is: 1484 * 1485 * 1. get the request. 1486 * 2. map the page based on the grant ref. 1487 * 3. allocate an mblk, copy the data to it. 1488 * 4. release the grant. 1489 * 5. update the ring. 1490 * 6. pass the packet upward. 1491 * 7. kick the peer. 1492 * 1493 * In fact, we try to perform the grant operations in batches, 1494 * so there are two loops. 1495 */ 1496 1497 head = tail = NULL; 1498 around: 1499 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1500 1501 /* LINTED: constant in conditional context */ 1502 RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do); 1503 if (!work_to_do) { 1504 finished: 1505 return (head); 1506 } 1507 1508 start = xnbp->xnb_tx_ring.req_cons; 1509 end = xnbp->xnb_tx_ring.sring->req_prod; 1510 1511 if ((end - start) > NET_TX_RING_SIZE) { 1512 /* 1513 * This usually indicates that the frontend driver is 1514 * misbehaving, as it's not possible to have more than 1515 * NET_TX_RING_SIZE ring elements in play at any one 1516 * time. 1517 * 1518 * We reset the ring pointers to the state declared by 1519 * the frontend and try to carry on. 1520 */ 1521 cmn_err(CE_WARN, "xnb_from_peer: domain %d tried to give us %u " 1522 "items in the ring, resetting and trying to recover.", 1523 xnbp->xnb_peer, (end - start)); 1524 1525 /* LINTED: constant in conditional context */ 1526 BACK_RING_ATTACH(&xnbp->xnb_tx_ring, 1527 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1528 1529 goto around; 1530 } 1531 1532 for (loop = start, mop = xnbp->xnb_tx_mop, txpp = xnbp->xnb_tx_bufp; 1533 loop != end; 1534 loop++, mop++, txpp++) { 1535 xnb_txbuf_t *txp; 1536 1537 txp = xnb_txbuf_get(xnbp, KM_NOSLEEP); 1538 if (txp == NULL) 1539 break; 1540 1541 ASSERT(xnbp->xnb_tx_pages_writable || 1542 ((txp->xt_mop.flags & GNTMAP_readonly) 1543 == GNTMAP_readonly)); 1544 1545 txp->xt_mop.ref = 1546 RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop)->gref; 1547 1548 *mop = txp->xt_mop; 1549 *txpp = txp; 1550 } 1551 1552 if ((loop - start) == 0) 1553 goto finished; 1554 1555 end = loop; 1556 1557 if (xen_map_gref(GNTTABOP_map_grant_ref, xnbp->xnb_tx_mop, 1558 end - start, B_FALSE) != 0) { 1559 1560 cmn_err(CE_WARN, "xnb_from_peer: map grant operation failed"); 1561 1562 loop = start; 1563 txpp = xnbp->xnb_tx_bufp; 1564 1565 while (loop != end) { 1566 xnb_txbuf_put(xnbp, *txpp); 1567 1568 loop++; 1569 txpp++; 1570 } 1571 1572 goto finished; 1573 } 1574 1575 for (loop = start, mop = xnbp->xnb_tx_mop, txpp = xnbp->xnb_tx_bufp; 1576 loop != end; 1577 loop++, mop++, txpp++) { 1578 mblk_t *mp = NULL; 1579 int16_t status = NETIF_RSP_OKAY; 1580 xnb_txbuf_t *txp = *txpp; 1581 1582 if (mop->status != 0) { 1583 cmn_err(CE_WARN, "xnb_from_peer: " 1584 "failed to map buffer: %d", 1585 mop->status); 1586 status = NETIF_RSP_ERROR; 1587 } 1588 1589 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop); 1590 1591 if (status == NETIF_RSP_OKAY) { 1592 if (copy) { 1593 mp = allocb(txreq->size, BPRI_MED); 1594 if (mp == NULL) { 1595 status = NETIF_RSP_ERROR; 1596 xnbp->xnb_stat_tx_allocb_failed++; 1597 } else { 1598 bcopy((caddr_t)(uintptr_t) 1599 mop->host_addr + txreq->offset, 1600 mp->b_wptr, txreq->size); 1601 mp->b_wptr += txreq->size; 1602 } 1603 } else { 1604 mp = desballoc((uchar_t *)(uintptr_t) 1605 mop->host_addr + txreq->offset, 1606 txreq->size, 0, &txp->xt_free_rtn); 1607 if (mp == NULL) { 1608 status = NETIF_RSP_ERROR; 1609 xnbp->xnb_stat_tx_allocb_failed++; 1610 } else { 1611 txp->xt_id = txreq->id; 1612 txp->xt_status = status; 1613 txp->xt_mop = *mop; 1614 1615 mp->b_wptr += txreq->size; 1616 } 1617 } 1618 1619 /* 1620 * If we have a buffer and there are checksum 1621 * flags, process them appropriately. 1622 */ 1623 if ((mp != NULL) && 1624 ((txreq->flags & 1625 (NETTXF_csum_blank | NETTXF_data_validated)) 1626 != 0)) { 1627 mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp, 1628 mp, txreq->flags); 1629 xnbp->xnb_stat_tx_cksum_no_need++; 1630 } 1631 } 1632 1633 if (copy || (mp == NULL)) { 1634 txp->xt_status = status; 1635 txp->xt_id = txreq->id; 1636 xnb_tx_schedule_unmop(xnbp, mop, txp); 1637 } 1638 1639 if (mp != NULL) { 1640 xnbp->xnb_stat_opackets++; 1641 xnbp->xnb_stat_obytes += txreq->size; 1642 1643 mp->b_next = NULL; 1644 if (head == NULL) { 1645 ASSERT(tail == NULL); 1646 head = mp; 1647 } else { 1648 ASSERT(tail != NULL); 1649 tail->b_next = mp; 1650 } 1651 tail = mp; 1652 } 1653 } 1654 1655 xnbp->xnb_tx_ring.req_cons = loop; 1656 1657 goto around; 1658 /* NOTREACHED */ 1659 } 1660 1661 /* 1662 * intr() -- ring interrupt service routine 1663 */ 1664 static uint_t 1665 xnb_intr(caddr_t arg) 1666 { 1667 xnb_t *xnbp = (xnb_t *)arg; 1668 mblk_t *mp; 1669 1670 xnbp->xnb_stat_intr++; 1671 1672 mutex_enter(&xnbp->xnb_tx_lock); 1673 1674 ASSERT(xnbp->xnb_connected); 1675 1676 mp = xnb_from_peer(xnbp); 1677 1678 mutex_exit(&xnbp->xnb_tx_lock); 1679 1680 if (!xnbp->xnb_hotplugged) { 1681 xnbp->xnb_stat_tx_too_early++; 1682 goto fail; 1683 } 1684 if (mp == NULL) { 1685 xnbp->xnb_stat_spurious_intr++; 1686 goto fail; 1687 } 1688 1689 xnbp->xnb_flavour->xf_from_peer(xnbp, mp); 1690 1691 return (DDI_INTR_CLAIMED); 1692 1693 fail: 1694 freemsgchain(mp); 1695 return (DDI_INTR_CLAIMED); 1696 } 1697 1698 static boolean_t 1699 xnb_connect_rings(dev_info_t *dip) 1700 { 1701 xnb_t *xnbp = ddi_get_driver_private(dip); 1702 char *oename; 1703 struct gnttab_map_grant_ref map_op; 1704 evtchn_port_t evtchn; 1705 int i; 1706 1707 /* 1708 * Cannot attempt to connect the rings if already connected. 1709 */ 1710 ASSERT(!xnbp->xnb_connected); 1711 1712 oename = xvdi_get_oename(dip); 1713 1714 if (xenbus_gather(XBT_NULL, oename, 1715 "event-channel", "%u", &evtchn, 1716 "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref, 1717 "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref, 1718 NULL) != 0) { 1719 cmn_err(CE_WARN, "xnb_connect_rings: " 1720 "cannot read other-end details from %s", 1721 oename); 1722 goto fail; 1723 } 1724 1725 if (xenbus_scanf(XBT_NULL, oename, 1726 "feature-tx-writable", "%d", &i) != 0) 1727 i = 0; 1728 if (i != 0) 1729 xnbp->xnb_tx_pages_writable = B_TRUE; 1730 1731 if (xenbus_scanf(XBT_NULL, oename, 1732 "feature-no-csum-offload", "%d", &i) != 0) 1733 i = 0; 1734 if ((i == 1) || !xnbp->xnb_cksum_offload) 1735 xnbp->xnb_cksum_offload = B_FALSE; 1736 1737 /* Check whether our peer knows and requests hypervisor copy */ 1738 if (xenbus_scanf(XBT_NULL, oename, "request-rx-copy", "%d", &i) 1739 != 0) 1740 i = 0; 1741 if (i != 0) 1742 xnbp->xnb_hv_copy = B_TRUE; 1743 1744 /* 1745 * 1. allocate a vaddr for the tx page, one for the rx page. 1746 * 2. call GNTTABOP_map_grant_ref to map the relevant pages 1747 * into the allocated vaddr (one for tx, one for rx). 1748 * 3. call EVTCHNOP_bind_interdomain to have the event channel 1749 * bound to this domain. 1750 * 4. associate the event channel with an interrupt. 1751 * 5. declare ourselves connected. 1752 * 6. enable the interrupt. 1753 */ 1754 1755 /* 1.tx */ 1756 xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1757 0, 0, 0, 0, VM_SLEEP); 1758 ASSERT(xnbp->xnb_tx_ring_addr != NULL); 1759 1760 /* 2.tx */ 1761 map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr); 1762 map_op.flags = GNTMAP_host_map; 1763 map_op.ref = xnbp->xnb_tx_ring_ref; 1764 map_op.dom = xnbp->xnb_peer; 1765 hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr, NULL); 1766 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 || 1767 map_op.status != 0) { 1768 cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page."); 1769 goto fail; 1770 } 1771 xnbp->xnb_tx_ring_handle = map_op.handle; 1772 1773 /* LINTED: constant in conditional context */ 1774 BACK_RING_INIT(&xnbp->xnb_tx_ring, 1775 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1776 1777 /* 1.rx */ 1778 xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1779 0, 0, 0, 0, VM_SLEEP); 1780 ASSERT(xnbp->xnb_rx_ring_addr != NULL); 1781 1782 /* 2.rx */ 1783 map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr); 1784 map_op.flags = GNTMAP_host_map; 1785 map_op.ref = xnbp->xnb_rx_ring_ref; 1786 map_op.dom = xnbp->xnb_peer; 1787 hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr, NULL); 1788 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 || 1789 map_op.status != 0) { 1790 cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page."); 1791 goto fail; 1792 } 1793 xnbp->xnb_rx_ring_handle = map_op.handle; 1794 1795 /* LINTED: constant in conditional context */ 1796 BACK_RING_INIT(&xnbp->xnb_rx_ring, 1797 (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE); 1798 1799 /* 3 */ 1800 if (xvdi_bind_evtchn(dip, evtchn) != DDI_SUCCESS) { 1801 cmn_err(CE_WARN, "xnb_connect_rings: " 1802 "cannot bind event channel %d", xnbp->xnb_evtchn); 1803 xnbp->xnb_evtchn = INVALID_EVTCHN; 1804 goto fail; 1805 } 1806 xnbp->xnb_evtchn = xvdi_get_evtchn(dip); 1807 1808 /* 1809 * It would be good to set the state to XenbusStateConnected 1810 * here as well, but then what if ddi_add_intr() failed? 1811 * Changing the state in the store will be noticed by the peer 1812 * and cannot be "taken back". 1813 */ 1814 mutex_enter(&xnbp->xnb_tx_lock); 1815 mutex_enter(&xnbp->xnb_rx_lock); 1816 1817 /* 5.1 */ 1818 xnbp->xnb_connected = B_TRUE; 1819 1820 mutex_exit(&xnbp->xnb_rx_lock); 1821 mutex_exit(&xnbp->xnb_tx_lock); 1822 1823 /* 4, 6 */ 1824 if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp) 1825 != DDI_SUCCESS) { 1826 cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt"); 1827 goto fail; 1828 } 1829 xnbp->xnb_irq = B_TRUE; 1830 1831 /* 5.2 */ 1832 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1833 1834 return (B_TRUE); 1835 1836 fail: 1837 mutex_enter(&xnbp->xnb_tx_lock); 1838 mutex_enter(&xnbp->xnb_rx_lock); 1839 1840 xnbp->xnb_connected = B_FALSE; 1841 mutex_exit(&xnbp->xnb_rx_lock); 1842 mutex_exit(&xnbp->xnb_tx_lock); 1843 1844 return (B_FALSE); 1845 } 1846 1847 static void 1848 xnb_disconnect_rings(dev_info_t *dip) 1849 { 1850 xnb_t *xnbp = ddi_get_driver_private(dip); 1851 1852 if (xnbp->xnb_irq) { 1853 ddi_remove_intr(dip, 0, NULL); 1854 xnbp->xnb_irq = B_FALSE; 1855 } 1856 1857 if (xnbp->xnb_tx_unmop_count > 0) 1858 xnb_tx_perform_pending_unmop(xnbp); 1859 1860 if (xnbp->xnb_evtchn != INVALID_EVTCHN) { 1861 xvdi_free_evtchn(dip); 1862 xnbp->xnb_evtchn = INVALID_EVTCHN; 1863 } 1864 1865 if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) { 1866 struct gnttab_unmap_grant_ref unmap_op; 1867 1868 unmap_op.host_addr = (uint64_t)(uintptr_t) 1869 xnbp->xnb_rx_ring_addr; 1870 unmap_op.dev_bus_addr = 0; 1871 unmap_op.handle = xnbp->xnb_rx_ring_handle; 1872 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1873 &unmap_op, 1) != 0) 1874 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1875 "cannot unmap rx-ring page (%d)", 1876 unmap_op.status); 1877 1878 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 1879 } 1880 1881 if (xnbp->xnb_rx_ring_addr != NULL) { 1882 hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr); 1883 vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE); 1884 xnbp->xnb_rx_ring_addr = NULL; 1885 } 1886 1887 if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) { 1888 struct gnttab_unmap_grant_ref unmap_op; 1889 1890 unmap_op.host_addr = (uint64_t)(uintptr_t) 1891 xnbp->xnb_tx_ring_addr; 1892 unmap_op.dev_bus_addr = 0; 1893 unmap_op.handle = xnbp->xnb_tx_ring_handle; 1894 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1895 &unmap_op, 1) != 0) 1896 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1897 "cannot unmap tx-ring page (%d)", 1898 unmap_op.status); 1899 1900 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 1901 } 1902 1903 if (xnbp->xnb_tx_ring_addr != NULL) { 1904 hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr); 1905 vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE); 1906 xnbp->xnb_tx_ring_addr = NULL; 1907 } 1908 } 1909 1910 /*ARGSUSED*/ 1911 static void 1912 xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1913 void *arg, void *impl_data) 1914 { 1915 xnb_t *xnbp = ddi_get_driver_private(dip); 1916 XenbusState new_state = *(XenbusState *)impl_data; 1917 1918 ASSERT(xnbp != NULL); 1919 1920 switch (new_state) { 1921 case XenbusStateConnected: 1922 /* spurious state change */ 1923 if (xnbp->xnb_connected) 1924 return; 1925 1926 if (xnb_connect_rings(dip)) { 1927 xnbp->xnb_flavour->xf_peer_connected(xnbp); 1928 } else { 1929 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1930 xnb_disconnect_rings(dip); 1931 (void) xvdi_switch_state(dip, XBT_NULL, 1932 XenbusStateClosed); 1933 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1934 } 1935 1936 /* 1937 * Now that we've attempted to connect it's reasonable 1938 * to allow an attempt to detach. 1939 */ 1940 xnbp->xnb_detachable = B_TRUE; 1941 1942 break; 1943 1944 case XenbusStateClosing: 1945 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); 1946 1947 break; 1948 1949 case XenbusStateClosed: 1950 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1951 1952 mutex_enter(&xnbp->xnb_tx_lock); 1953 mutex_enter(&xnbp->xnb_rx_lock); 1954 1955 xnb_disconnect_rings(dip); 1956 xnbp->xnb_connected = B_FALSE; 1957 1958 mutex_exit(&xnbp->xnb_rx_lock); 1959 mutex_exit(&xnbp->xnb_tx_lock); 1960 1961 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1962 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1963 /* 1964 * In all likelyhood this is already set (in the above 1965 * case), but if the peer never attempted to connect 1966 * and the domain is destroyed we get here without 1967 * having been through the case above, so we set it to 1968 * be sure. 1969 */ 1970 xnbp->xnb_detachable = B_TRUE; 1971 1972 break; 1973 1974 default: 1975 break; 1976 } 1977 } 1978 1979 /*ARGSUSED*/ 1980 static void 1981 xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1982 void *arg, void *impl_data) 1983 { 1984 xnb_t *xnbp = ddi_get_driver_private(dip); 1985 xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data; 1986 boolean_t success; 1987 1988 ASSERT(xnbp != NULL); 1989 1990 switch (state) { 1991 case Connected: 1992 1993 /* spurious hotplug event */ 1994 if (xnbp->xnb_hotplugged) 1995 return; 1996 1997 success = xnbp->xnb_flavour->xf_hotplug_connected(xnbp); 1998 1999 mutex_enter(&xnbp->xnb_tx_lock); 2000 mutex_enter(&xnbp->xnb_rx_lock); 2001 2002 xnbp->xnb_hotplugged = success; 2003 2004 mutex_exit(&xnbp->xnb_rx_lock); 2005 mutex_exit(&xnbp->xnb_tx_lock); 2006 break; 2007 2008 default: 2009 break; 2010 } 2011 } 2012 2013 static struct modldrv modldrv = { 2014 &mod_miscops, "xnb", 2015 }; 2016 2017 static struct modlinkage modlinkage = { 2018 MODREV_1, &modldrv, NULL 2019 }; 2020 2021 int 2022 _init(void) 2023 { 2024 int i; 2025 2026 mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL); 2027 2028 xnb_txbuf_cachep = kmem_cache_create("xnb_txbuf_cachep", 2029 sizeof (xnb_txbuf_t), 0, xnb_txbuf_constructor, 2030 xnb_txbuf_destructor, NULL, NULL, NULL, 0); 2031 ASSERT(xnb_txbuf_cachep != NULL); 2032 2033 i = mod_install(&modlinkage); 2034 if (i != DDI_SUCCESS) { 2035 kmem_cache_destroy(xnb_txbuf_cachep); 2036 mutex_destroy(&xnb_alloc_page_lock); 2037 } 2038 return (i); 2039 } 2040 2041 int 2042 _info(struct modinfo *modinfop) 2043 { 2044 return (mod_info(&modlinkage, modinfop)); 2045 } 2046 2047 int 2048 _fini(void) 2049 { 2050 int i; 2051 2052 i = mod_remove(&modlinkage); 2053 if (i == DDI_SUCCESS) { 2054 kmem_cache_destroy(xnb_txbuf_cachep); 2055 mutex_destroy(&xnb_alloc_page_lock); 2056 } 2057 return (i); 2058 } 2059