1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #ifdef DEBUG 30 #define XNB_DEBUG 1 31 #endif /* DEBUG */ 32 33 #include "xnb.h" 34 35 #include <sys/sunddi.h> 36 #include <sys/sunndi.h> 37 #include <sys/modctl.h> 38 #include <sys/conf.h> 39 #include <sys/mac.h> 40 #include <sys/dlpi.h> 41 #include <sys/strsubr.h> 42 #include <sys/strsun.h> 43 #include <sys/pattr.h> 44 #include <vm/seg_kmem.h> 45 #include <vm/hat_i86.h> 46 #include <xen/sys/xenbus_impl.h> 47 #include <xen/sys/xendev.h> 48 #include <sys/balloon_impl.h> 49 #include <sys/evtchn_impl.h> 50 #include <sys/gnttab.h> 51 52 #include <sys/gld.h> 53 #include <inet/ip.h> 54 #include <inet/ip_impl.h> 55 #include <sys/vnic_impl.h> /* blech. */ 56 57 /* 58 * The terms "transmit" and "receive" are used in their traditional 59 * sense here - packets from other parts of this system are 60 * "transmitted" to the peer domain and those originating from the 61 * peer are "received". 62 * 63 * In some cases this can be confusing, because various data 64 * structures are shared with the domU driver, which has the opposite 65 * view of what constitutes "transmit" and "receive". In naming the 66 * shared structures the domU driver always wins. 67 */ 68 69 /* 70 * XXPV dme: things to do, as well as various things indicated 71 * throughout the source: 72 * - copy avoidance outbound. 73 * - copy avoidance inbound. 74 * - transfer credit limiting. 75 * - MAC address based filtering. 76 */ 77 78 /* 79 * Linux expects to have some headroom in received buffers. The Linux 80 * frontend driver (netfront) checks to see if the headroom is 81 * available and will re-allocate the buffer to make room if 82 * necessary. To avoid this we add TX_BUFFER_HEADROOM bytes of 83 * headroom to each packet we pass to the peer. 84 */ 85 #define TX_BUFFER_HEADROOM 16 86 87 static boolean_t xnb_cksum_offload = B_TRUE; 88 89 static boolean_t xnb_connect_rings(dev_info_t *); 90 static void xnb_disconnect_rings(dev_info_t *); 91 static void xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t, 92 void *, void *); 93 static void xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t, 94 void *, void *); 95 96 static int xnb_rxbuf_constructor(void *, void *, int); 97 static void xnb_rxbuf_destructor(void *, void *); 98 static xnb_rxbuf_t *xnb_rxbuf_get(xnb_t *, int); 99 static void xnb_rxbuf_put(xnb_t *, xnb_rxbuf_t *); 100 static void xnb_rx_notify_peer(xnb_t *); 101 static void xnb_rx_complete(xnb_rxbuf_t *); 102 static void xnb_rx_mark_complete(xnb_t *, RING_IDX, int16_t); 103 static void xnb_rx_schedule_unmop(xnb_t *, gnttab_map_grant_ref_t *); 104 static void xnb_rx_perform_pending_unmop(xnb_t *); 105 106 #ifdef XNB_DEBUG 107 #define NR_GRANT_ENTRIES \ 108 (NR_GRANT_FRAMES * PAGESIZE / sizeof (grant_entry_t)) 109 #endif /* XNB_DEBUG */ 110 111 /* XXPV dme: are these really invalid? */ 112 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) 113 #define INVALID_GRANT_REF ((grant_ref_t)-1) 114 115 static kmem_cache_t *xnb_rxbuf_cachep; 116 static kmutex_t xnb_alloc_page_lock; 117 118 /* 119 * Statistics. 120 */ 121 static char *aux_statistics[] = { 122 "tx_cksum_deferred", 123 "rx_cksum_no_need", 124 "tx_notify_deferred", 125 "tx_notify_sent", 126 "rx_notify_deferred", 127 "rx_notify_sent", 128 "tx_too_early", 129 "rx_too_early", 130 "rx_allocb_failed", 131 "mac_full", 132 "spurious_intr", 133 "allocation_success", 134 "allocation_failure", 135 "small_allocation_success", 136 "small_allocation_failure", 137 "csum_hardware", 138 "csum_software", 139 }; 140 141 static int 142 xnb_ks_aux_update(kstat_t *ksp, int flag) 143 { 144 xnb_t *xnbp; 145 kstat_named_t *knp; 146 147 if (flag != KSTAT_READ) 148 return (EACCES); 149 150 xnbp = ksp->ks_private; 151 knp = ksp->ks_data; 152 153 /* 154 * Assignment order should match that of the names in 155 * aux_statistics. 156 */ 157 (knp++)->value.ui64 = xnbp->x_stat_tx_cksum_deferred; 158 (knp++)->value.ui64 = xnbp->x_stat_rx_cksum_no_need; 159 (knp++)->value.ui64 = xnbp->x_stat_tx_notify_deferred; 160 (knp++)->value.ui64 = xnbp->x_stat_tx_notify_sent; 161 (knp++)->value.ui64 = xnbp->x_stat_rx_notify_deferred; 162 (knp++)->value.ui64 = xnbp->x_stat_rx_notify_sent; 163 (knp++)->value.ui64 = xnbp->x_stat_tx_too_early; 164 (knp++)->value.ui64 = xnbp->x_stat_rx_too_early; 165 (knp++)->value.ui64 = xnbp->x_stat_rx_allocb_failed; 166 (knp++)->value.ui64 = xnbp->x_stat_mac_full; 167 (knp++)->value.ui64 = xnbp->x_stat_spurious_intr; 168 (knp++)->value.ui64 = xnbp->x_stat_allocation_success; 169 (knp++)->value.ui64 = xnbp->x_stat_allocation_failure; 170 (knp++)->value.ui64 = xnbp->x_stat_small_allocation_success; 171 (knp++)->value.ui64 = xnbp->x_stat_small_allocation_failure; 172 (knp++)->value.ui64 = xnbp->x_stat_csum_hardware; 173 (knp++)->value.ui64 = xnbp->x_stat_csum_software; 174 175 return (0); 176 } 177 178 static boolean_t 179 xnb_ks_init(xnb_t *xnbp) 180 { 181 int nstat = sizeof (aux_statistics) / 182 sizeof (aux_statistics[0]); 183 char **cp = aux_statistics; 184 kstat_named_t *knp; 185 186 /* 187 * Create and initialise kstats. 188 */ 189 xnbp->x_kstat_aux = kstat_create(ddi_driver_name(xnbp->x_devinfo), 190 ddi_get_instance(xnbp->x_devinfo), "aux_statistics", "net", 191 KSTAT_TYPE_NAMED, nstat, 0); 192 if (xnbp->x_kstat_aux == NULL) 193 return (B_FALSE); 194 195 xnbp->x_kstat_aux->ks_private = xnbp; 196 xnbp->x_kstat_aux->ks_update = xnb_ks_aux_update; 197 198 knp = xnbp->x_kstat_aux->ks_data; 199 while (nstat > 0) { 200 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 201 202 knp++; 203 cp++; 204 nstat--; 205 } 206 207 kstat_install(xnbp->x_kstat_aux); 208 209 return (B_TRUE); 210 } 211 212 static void 213 xnb_ks_free(xnb_t *xnbp) 214 { 215 kstat_delete(xnbp->x_kstat_aux); 216 } 217 218 /* 219 * Software checksum calculation and insertion for an arbitrary packet. 220 */ 221 /*ARGSUSED*/ 222 static mblk_t * 223 xnb_software_csum(xnb_t *xnbp, mblk_t *mp) 224 { 225 /* 226 * XXPV dme: shouldn't rely on vnic_fix_cksum(), not least 227 * because it doesn't cover all of the interesting cases :-( 228 */ 229 (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, 230 HCK_FULLCKSUM, KM_NOSLEEP); 231 232 return (vnic_fix_cksum(mp)); 233 } 234 235 mblk_t * 236 xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) 237 { 238 struct ether_header *ehp; 239 uint16_t sap; 240 uint32_t offset; 241 ipha_t *ipha; 242 243 ASSERT(mp->b_next == NULL); 244 245 /* 246 * Check that the packet is contained in a single mblk. In 247 * the "from peer" path this is true today, but will change 248 * when scatter gather support is added. In the "to peer" 249 * path we cannot be sure, but in most cases it will be true 250 * (in the xnbo case the packet has come from a MAC device 251 * which is unlikely to split packets). 252 */ 253 if (mp->b_cont != NULL) 254 goto software; 255 256 /* 257 * If the MAC has no hardware capability don't do any further 258 * checking. 259 */ 260 if (capab == 0) 261 goto software; 262 263 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 264 ehp = (struct ether_header *)mp->b_rptr; 265 266 if (ntohs(ehp->ether_type) == VLAN_TPID) { 267 struct ether_vlan_header *evhp; 268 269 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 270 evhp = (struct ether_vlan_header *)mp->b_rptr; 271 sap = ntohs(evhp->ether_type); 272 offset = sizeof (struct ether_vlan_header); 273 } else { 274 sap = ntohs(ehp->ether_type); 275 offset = sizeof (struct ether_header); 276 } 277 278 /* 279 * We only attempt to do IPv4 packets in hardware. 280 */ 281 if (sap != ETHERTYPE_IP) 282 goto software; 283 284 /* 285 * We know that this is an IPv4 packet. 286 */ 287 ipha = (ipha_t *)(mp->b_rptr + offset); 288 289 switch (ipha->ipha_protocol) { 290 case IPPROTO_TCP: 291 case IPPROTO_UDP: 292 /* 293 * This is a TCP/IPv4 or UDP/IPv4 packet. 294 * 295 * If the capabilities indicate that full checksum 296 * offload is available, use it. 297 */ 298 if ((capab & HCKSUM_INET_FULL_V4) != 0) { 299 (void) hcksum_assoc(mp, NULL, NULL, 300 0, 0, 0, 0, 301 HCK_FULLCKSUM, KM_NOSLEEP); 302 303 xnbp->x_stat_csum_hardware++; 304 305 return (mp); 306 } 307 308 /* 309 * XXPV dme: If the capabilities indicate that partial 310 * checksum offload is available, we should use it. 311 */ 312 313 break; 314 315 default: 316 /* Use software. */ 317 break; 318 } 319 320 software: 321 /* 322 * We are not able to use any offload so do the whole thing in 323 * software. 324 */ 325 xnbp->x_stat_csum_software++; 326 327 return (xnb_software_csum(xnbp, mp)); 328 } 329 330 int 331 xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data) 332 { 333 xnb_t *xnbp; 334 char *xsname, mac[ETHERADDRL * 3]; 335 336 xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP); 337 338 xnbp->x_flavour = flavour; 339 xnbp->x_flavour_data = flavour_data; 340 xnbp->x_devinfo = dip; 341 xnbp->x_evtchn = INVALID_EVTCHN; 342 xnbp->x_irq = B_FALSE; 343 xnbp->x_tx_ring_handle = INVALID_GRANT_HANDLE; 344 xnbp->x_rx_ring_handle = INVALID_GRANT_HANDLE; 345 xnbp->x_cksum_offload = xnb_cksum_offload; 346 xnbp->x_connected = B_FALSE; 347 xnbp->x_hotplugged = B_FALSE; 348 xnbp->x_detachable = B_FALSE; 349 xnbp->x_peer = xvdi_get_oeid(dip); 350 xnbp->x_rx_pages_writable = B_FALSE; 351 352 xnbp->x_rx_buf_count = 0; 353 xnbp->x_rx_unmop_count = 0; 354 355 xnbp->x_tx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 356 ASSERT(xnbp->x_tx_va != NULL); 357 358 if (ddi_get_iblock_cookie(dip, 0, &xnbp->x_icookie) 359 != DDI_SUCCESS) 360 goto failure; 361 362 mutex_init(&xnbp->x_tx_lock, NULL, MUTEX_DRIVER, xnbp->x_icookie); 363 mutex_init(&xnbp->x_rx_lock, NULL, MUTEX_DRIVER, xnbp->x_icookie); 364 365 /* set driver private pointer now */ 366 ddi_set_driver_private(dip, xnbp); 367 368 if (!xnb_ks_init(xnbp)) 369 goto late_failure; 370 371 /* 372 * Receive notification of changes in the state of the 373 * driver in the guest domain. 374 */ 375 if (xvdi_add_event_handler(dip, XS_OE_STATE, 376 xnb_oe_state_change) != DDI_SUCCESS) 377 goto very_late_failure; 378 379 /* 380 * Receive notification of hotplug events. 381 */ 382 if (xvdi_add_event_handler(dip, XS_HP_STATE, 383 xnb_hp_state_change) != DDI_SUCCESS) 384 goto very_late_failure; 385 386 xsname = xvdi_get_xsname(dip); 387 388 if (xenbus_printf(XBT_NULL, xsname, 389 "feature-no-csum-offload", "%d", 390 xnbp->x_cksum_offload ? 0 : 1) != 0) 391 goto very_very_late_failure; 392 393 if (xenbus_scanf(XBT_NULL, xsname, 394 "mac", "%s", mac) != 0) { 395 cmn_err(CE_WARN, "xnb_attach: " 396 "cannot read mac address from %s", 397 xsname); 398 goto very_very_late_failure; 399 } 400 401 if (ether_aton(mac, xnbp->x_mac_addr) != ETHERADDRL) { 402 cmn_err(CE_WARN, 403 "xnb_attach: cannot parse mac address %s", 404 mac); 405 goto very_very_late_failure; 406 } 407 408 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait); 409 (void) xvdi_post_event(dip, XEN_HP_ADD); 410 411 return (DDI_SUCCESS); 412 413 very_very_late_failure: /* not that the naming is getting silly or anything */ 414 xvdi_remove_event_handler(dip, NULL); 415 416 very_late_failure: 417 xnb_ks_free(xnbp); 418 419 late_failure: 420 mutex_destroy(&xnbp->x_rx_lock); 421 mutex_destroy(&xnbp->x_tx_lock); 422 423 failure: 424 vmem_free(heap_arena, xnbp->x_tx_va, PAGESIZE); 425 kmem_free(xnbp, sizeof (*xnbp)); 426 return (DDI_FAILURE); 427 } 428 429 /*ARGSUSED*/ 430 void 431 xnb_detach(dev_info_t *dip) 432 { 433 xnb_t *xnbp = ddi_get_driver_private(dip); 434 435 ASSERT(xnbp != NULL); 436 ASSERT(!xnbp->x_connected); 437 ASSERT(xnbp->x_rx_buf_count == 0); 438 439 xnb_disconnect_rings(dip); 440 441 xvdi_remove_event_handler(dip, NULL); 442 443 xnb_ks_free(xnbp); 444 445 ddi_set_driver_private(dip, NULL); 446 447 mutex_destroy(&xnbp->x_tx_lock); 448 mutex_destroy(&xnbp->x_rx_lock); 449 450 ASSERT(xnbp->x_tx_va != NULL); 451 vmem_free(heap_arena, xnbp->x_tx_va, PAGESIZE); 452 453 kmem_free(xnbp, sizeof (*xnbp)); 454 } 455 456 457 static mfn_t 458 xnb_alloc_page(xnb_t *xnbp) 459 { 460 #define WARNING_RATE_LIMIT 100 461 #define BATCH_SIZE 256 462 static mfn_t mfns[BATCH_SIZE]; /* common across all instances */ 463 static int nth = BATCH_SIZE; 464 mfn_t mfn; 465 466 mutex_enter(&xnb_alloc_page_lock); 467 if (nth == BATCH_SIZE) { 468 if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) { 469 xnbp->x_stat_allocation_failure++; 470 mutex_exit(&xnb_alloc_page_lock); 471 472 /* 473 * Try for a single page in low memory situations. 474 */ 475 if (balloon_alloc_pages(1, &mfn) != 1) { 476 xnbp->x_stat_small_allocation_failure++; 477 if ((xnbp->x_stat_small_allocation_failure 478 % WARNING_RATE_LIMIT) == 0) { 479 cmn_err(CE_WARN, "xnb_alloc_page: " 480 "Cannot allocate memory to " 481 "transfer packets to peer."); 482 } 483 return (0); 484 } else { 485 xnbp->x_stat_small_allocation_success++; 486 return (mfn); 487 } 488 } 489 490 nth = 0; 491 xnbp->x_stat_allocation_success++; 492 } 493 494 mfn = mfns[nth++]; 495 mutex_exit(&xnb_alloc_page_lock); 496 497 ASSERT(mfn != 0); 498 499 return (mfn); 500 #undef BATCH_SIZE 501 #undef WARNING_RATE_LIMIT 502 } 503 504 /*ARGSUSED*/ 505 static void 506 xnb_free_page(xnb_t *xnbp, mfn_t mfn) 507 { 508 int r; 509 510 /* 511 * This happens only in the error path, so batching is 512 * not worth the complication. 513 */ 514 if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) { 515 cmn_err(CE_WARN, "free_page: cannot decrease memory " 516 "reservation (%d): page kept but unusable (mfn = 0x%lx).", 517 r, mfn); 518 } 519 } 520 521 mblk_t * 522 xnb_to_peer(xnb_t *xnbp, mblk_t *mp) 523 { 524 mblk_t *free = mp, *prev = NULL; 525 size_t len; 526 gnttab_transfer_t *gop; 527 boolean_t notify; 528 RING_IDX loop, prod, end; 529 530 /* 531 * For each packet the sequence of operations is: 532 * 533 * 1. get a new page from the hypervisor. 534 * 2. get a request slot from the ring. 535 * 3. copy the data into the new page. 536 * 4. transfer the page to the peer. 537 * 5. update the request slot. 538 * 6. kick the peer. 539 * 7. free mp. 540 * 541 * In order to reduce the number of hypercalls, we prepare 542 * several packets for the peer and perform a single hypercall 543 * to transfer them. 544 */ 545 546 mutex_enter(&xnbp->x_tx_lock); 547 548 /* 549 * If we are not connected to the peer or have not yet 550 * finished hotplug it is too early to pass packets to the 551 * peer. 552 */ 553 if (!(xnbp->x_connected && xnbp->x_hotplugged)) { 554 mutex_exit(&xnbp->x_tx_lock); 555 xnbp->x_stat_tx_too_early++; 556 return (mp); 557 } 558 559 loop = xnbp->x_rx_ring.req_cons; 560 prod = xnbp->x_rx_ring.rsp_prod_pvt; 561 gop = xnbp->x_tx_top; 562 563 /* 564 * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->x_rx_ring) but 565 * using local variables. 566 */ 567 #define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \ 568 ((((_r)->sring->req_prod - loop) < \ 569 (RING_SIZE(_r) - (loop - prod))) ? \ 570 ((_r)->sring->req_prod - loop) : \ 571 (RING_SIZE(_r) - (loop - prod))) 572 573 while ((mp != NULL) && 574 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->x_rx_ring)) { 575 576 mfn_t mfn; 577 pfn_t pfn; 578 netif_rx_request_t *rxreq; 579 netif_rx_response_t *rxresp; 580 char *valoop; 581 size_t offset; 582 mblk_t *ml; 583 uint16_t cksum_flags; 584 585 /* 1 */ 586 if ((mfn = xnb_alloc_page(xnbp)) == 0) { 587 xnbp->x_stat_xmit_defer++; 588 break; 589 } 590 591 /* 2 */ 592 rxreq = RING_GET_REQUEST(&xnbp->x_rx_ring, loop); 593 594 #ifdef XNB_DEBUG 595 if (!(rxreq->id < NET_RX_RING_SIZE)) 596 cmn_err(CE_PANIC, "xnb_to_peer: " 597 "id %d out of range in request 0x%p", 598 rxreq->id, (void *)rxreq); 599 if (rxreq->gref >= NR_GRANT_ENTRIES) 600 cmn_err(CE_PANIC, "xnb_to_peer: " 601 "grant ref %d out of range in request 0x%p", 602 rxreq->gref, (void *)rxreq); 603 #endif /* XNB_DEBUG */ 604 605 /* Assign a pfn and map the new page at the allocated va. */ 606 pfn = xen_assign_pfn(mfn); 607 hat_devload(kas.a_hat, xnbp->x_tx_va, PAGESIZE, 608 pfn, PROT_READ | PROT_WRITE, HAT_LOAD); 609 610 offset = TX_BUFFER_HEADROOM; 611 612 /* 3 */ 613 len = 0; 614 valoop = xnbp->x_tx_va + offset; 615 for (ml = mp; ml != NULL; ml = ml->b_cont) { 616 size_t chunk = ml->b_wptr - ml->b_rptr; 617 618 bcopy(ml->b_rptr, valoop, chunk); 619 valoop += chunk; 620 len += chunk; 621 } 622 623 ASSERT(len + offset < PAGESIZE); 624 625 /* Release the pfn. */ 626 hat_unload(kas.a_hat, xnbp->x_tx_va, PAGESIZE, 627 HAT_UNLOAD_UNMAP); 628 xen_release_pfn(pfn); 629 630 /* 4 */ 631 gop->mfn = mfn; 632 gop->domid = xnbp->x_peer; 633 gop->ref = rxreq->gref; 634 635 /* 5.1 */ 636 rxresp = RING_GET_RESPONSE(&xnbp->x_rx_ring, prod); 637 rxresp->offset = offset; 638 rxresp->flags = 0; 639 640 cksum_flags = xnbp->x_flavour->xf_cksum_to_peer(xnbp, mp); 641 if (cksum_flags != 0) 642 xnbp->x_stat_tx_cksum_deferred++; 643 rxresp->flags |= cksum_flags; 644 645 rxresp->id = RING_GET_REQUEST(&xnbp->x_rx_ring, prod)->id; 646 rxresp->status = len; 647 648 loop++; 649 prod++; 650 gop++; 651 prev = mp; 652 mp = mp->b_next; 653 } 654 655 /* 656 * Did we actually do anything? 657 */ 658 if (loop == xnbp->x_rx_ring.req_cons) { 659 mutex_exit(&xnbp->x_tx_lock); 660 return (mp); 661 } 662 663 end = loop; 664 665 /* 666 * Unlink the end of the 'done' list from the remainder. 667 */ 668 ASSERT(prev != NULL); 669 prev->b_next = NULL; 670 671 if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->x_tx_top, 672 loop - xnbp->x_rx_ring.req_cons) != 0) { 673 cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed"); 674 } 675 676 loop = xnbp->x_rx_ring.req_cons; 677 prod = xnbp->x_rx_ring.rsp_prod_pvt; 678 gop = xnbp->x_tx_top; 679 680 while (loop < end) { 681 int16_t status = NETIF_RSP_OKAY; 682 683 if (gop->status != 0) { 684 status = NETIF_RSP_ERROR; 685 686 /* 687 * If the status is anything other than 688 * GNTST_bad_page then we don't own the page 689 * any more, so don't try to give it back. 690 */ 691 if (gop->status != GNTST_bad_page) 692 gop->mfn = 0; 693 } else { 694 /* The page is no longer ours. */ 695 gop->mfn = 0; 696 } 697 698 if (gop->mfn != 0) 699 /* 700 * Give back the page, as we won't be using 701 * it. 702 */ 703 xnb_free_page(xnbp, gop->mfn); 704 else 705 /* 706 * We gave away a page, update our accounting 707 * now. 708 */ 709 balloon_drv_subtracted(1); 710 711 /* 5.2 */ 712 if (status != NETIF_RSP_OKAY) { 713 RING_GET_RESPONSE(&xnbp->x_rx_ring, prod)->status = 714 status; 715 } else { 716 xnbp->x_stat_opackets++; 717 xnbp->x_stat_obytes += len; 718 } 719 720 loop++; 721 prod++; 722 gop++; 723 } 724 725 xnbp->x_rx_ring.req_cons = loop; 726 xnbp->x_rx_ring.rsp_prod_pvt = prod; 727 728 /* 6 */ 729 /*LINTED: constant in conditional context*/ 730 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->x_rx_ring, notify); 731 if (notify) { 732 ec_notify_via_evtchn(xnbp->x_evtchn); 733 xnbp->x_stat_tx_notify_sent++; 734 } else { 735 xnbp->x_stat_tx_notify_deferred++; 736 } 737 738 if (mp != NULL) 739 xnbp->x_stat_xmit_defer++; 740 741 mutex_exit(&xnbp->x_tx_lock); 742 743 /* Free mblk_t's that we consumed. */ 744 freemsgchain(free); 745 746 return (mp); 747 } 748 749 /*ARGSUSED*/ 750 static int 751 xnb_rxbuf_constructor(void *buf, void *arg, int kmflag) 752 { 753 xnb_rxbuf_t *rxp = buf; 754 755 bzero(rxp, sizeof (*rxp)); 756 757 rxp->xr_free_rtn.free_func = xnb_rx_complete; 758 rxp->xr_free_rtn.free_arg = (caddr_t)rxp; 759 760 rxp->xr_mop.host_addr = 761 (uint64_t)(uintptr_t)vmem_alloc(heap_arena, PAGESIZE, 762 ((kmflag & KM_NOSLEEP) == KM_NOSLEEP) ? 763 VM_NOSLEEP : VM_SLEEP); 764 765 if (rxp->xr_mop.host_addr == NULL) { 766 cmn_err(CE_WARN, "xnb_rxbuf_constructor: " 767 "cannot get address space"); 768 return (-1); 769 } 770 771 /* 772 * Have the hat ensure that page table exists for the VA. 773 */ 774 hat_prepare_mapping(kas.a_hat, 775 (caddr_t)(uintptr_t)rxp->xr_mop.host_addr); 776 777 return (0); 778 } 779 780 /*ARGSUSED*/ 781 static void 782 xnb_rxbuf_destructor(void *buf, void *arg) 783 { 784 xnb_rxbuf_t *rxp = buf; 785 786 ASSERT(rxp->xr_mop.host_addr != NULL); 787 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == 0); 788 789 hat_release_mapping(kas.a_hat, 790 (caddr_t)(uintptr_t)rxp->xr_mop.host_addr); 791 vmem_free(heap_arena, 792 (caddr_t)(uintptr_t)rxp->xr_mop.host_addr, PAGESIZE); 793 } 794 795 static void 796 xnb_rx_notify_peer(xnb_t *xnbp) 797 { 798 boolean_t notify; 799 800 ASSERT(MUTEX_HELD(&xnbp->x_rx_lock)); 801 802 /*LINTED: constant in conditional context*/ 803 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->x_tx_ring, notify); 804 if (notify) { 805 ec_notify_via_evtchn(xnbp->x_evtchn); 806 xnbp->x_stat_rx_notify_sent++; 807 } else { 808 xnbp->x_stat_rx_notify_deferred++; 809 } 810 } 811 812 static void 813 xnb_rx_complete(xnb_rxbuf_t *rxp) 814 { 815 xnb_t *xnbp = rxp->xr_xnbp; 816 817 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == XNB_RXBUF_INUSE); 818 819 mutex_enter(&xnbp->x_rx_lock); 820 821 xnb_rx_schedule_unmop(xnbp, &rxp->xr_mop); 822 xnb_rx_perform_pending_unmop(xnbp); 823 824 if (xnbp->x_connected) { 825 xnb_rx_mark_complete(xnbp, rxp->xr_id, rxp->xr_status); 826 xnb_rx_notify_peer(xnbp); 827 } 828 829 xnb_rxbuf_put(xnbp, rxp); 830 831 mutex_exit(&xnbp->x_rx_lock); 832 } 833 834 static void 835 xnb_rx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status) 836 { 837 RING_IDX i; 838 netif_tx_response_t *txresp; 839 840 ASSERT(MUTEX_HELD(&xnbp->x_rx_lock)); 841 842 i = xnbp->x_tx_ring.rsp_prod_pvt; 843 844 txresp = RING_GET_RESPONSE(&xnbp->x_tx_ring, i); 845 txresp->id = id; 846 txresp->status = status; 847 848 xnbp->x_tx_ring.rsp_prod_pvt = i + 1; 849 850 /* 851 * Note that we don't push the change to the peer here - that 852 * is the callers responsibility. 853 */ 854 } 855 856 /* 857 * XXPV dme: currently pending unmap operations are stored on a 858 * per-instance basis. Should they be per-driver? The locking would 859 * have to change (obviously), but there might be an improvement from 860 * batching more together. Right now they are all 'done' either at 861 * the tail of each receive operation (copy case) or on each 862 * completion (non-copy case). Should that be changed to some 863 * interval (watermark?) to improve the chance of batching? 864 */ 865 static void 866 xnb_rx_schedule_unmop(xnb_t *xnbp, gnttab_map_grant_ref_t *mop) 867 { 868 gnttab_unmap_grant_ref_t *unmop; 869 870 ASSERT(MUTEX_HELD(&xnbp->x_rx_lock)); 871 ASSERT(xnbp->x_rx_unmop_count <= NET_TX_RING_SIZE); 872 873 unmop = &xnbp->x_rx_unmop[xnbp->x_rx_unmop_count]; 874 xnbp->x_rx_unmop_count++; 875 876 unmop->host_addr = mop->host_addr; 877 unmop->dev_bus_addr = mop->dev_bus_addr; 878 unmop->handle = mop->handle; 879 880 #ifdef XNB_DEBUG 881 if (xnbp->x_rx_unmop_count <= NET_TX_RING_SIZE) 882 ASSERT(xnbp->x_rx_unmop[xnbp->x_rx_unmop_count].host_addr 883 == NULL); 884 #endif /* XNB_DEBUG */ 885 886 } 887 888 static void 889 xnb_rx_perform_pending_unmop(xnb_t *xnbp) 890 { 891 #ifdef XNB_DEBUG 892 RING_IDX loop; 893 gnttab_unmap_grant_ref_t *unmop; 894 #endif /* XNB_DEBUG */ 895 896 ASSERT(MUTEX_HELD(&xnbp->x_rx_lock)); 897 898 if (xnbp->x_rx_unmop_count == 0) 899 return; 900 901 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 902 xnbp->x_rx_unmop, xnbp->x_rx_unmop_count) < 0) { 903 cmn_err(CE_WARN, "xnb_rx_perform_pending_unmop: " 904 "unmap grant operation failed, " 905 "%d pages lost", xnbp->x_rx_unmop_count); 906 } 907 908 #ifdef XNB_DEBUG 909 for (loop = 0, unmop = xnbp->x_rx_unmop; 910 loop < xnbp->x_rx_unmop_count; 911 loop++, unmop++) { 912 if (unmop->status != 0) { 913 cmn_err(CE_WARN, "xnb_rx_perform_pending_unmop: " 914 "unmap grant reference failed (%d)", 915 unmop->status); 916 } 917 } 918 #endif /* XNB_DEBUG */ 919 920 xnbp->x_rx_unmop_count = 0; 921 922 #ifdef XNB_DEBUG 923 bzero(xnbp->x_rx_unmop, sizeof (xnbp->x_rx_unmop)); 924 #endif /* XNB_DEBUG */ 925 } 926 927 static xnb_rxbuf_t * 928 xnb_rxbuf_get(xnb_t *xnbp, int flags) 929 { 930 xnb_rxbuf_t *rxp; 931 932 ASSERT(MUTEX_HELD(&xnbp->x_rx_lock)); 933 934 rxp = kmem_cache_alloc(xnb_rxbuf_cachep, flags); 935 if (rxp != NULL) { 936 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == 0); 937 rxp->xr_flags |= XNB_RXBUF_INUSE; 938 939 rxp->xr_xnbp = xnbp; 940 rxp->xr_mop.dom = xnbp->x_peer; 941 942 rxp->xr_mop.flags = GNTMAP_host_map; 943 if (!xnbp->x_rx_pages_writable) 944 rxp->xr_mop.flags |= GNTMAP_readonly; 945 946 xnbp->x_rx_buf_count++; 947 } 948 949 return (rxp); 950 } 951 952 static void 953 xnb_rxbuf_put(xnb_t *xnbp, xnb_rxbuf_t *rxp) 954 { 955 ASSERT(MUTEX_HELD(&xnbp->x_rx_lock)); 956 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == XNB_RXBUF_INUSE); 957 958 rxp->xr_flags &= ~XNB_RXBUF_INUSE; 959 xnbp->x_rx_buf_count--; 960 961 kmem_cache_free(xnb_rxbuf_cachep, rxp); 962 } 963 964 static mblk_t * 965 xnb_recv(xnb_t *xnbp) 966 { 967 RING_IDX start, end, loop; 968 gnttab_map_grant_ref_t *mop; 969 xnb_rxbuf_t **rxpp; 970 netif_tx_request_t *txreq; 971 boolean_t work_to_do; 972 mblk_t *head, *tail; 973 /* 974 * If the peer granted a read-only mapping to the page then we 975 * must copy the data, as the local protocol stack (should the 976 * packet be destined for this host) will modify the packet 977 * 'in place'. 978 */ 979 boolean_t copy = !xnbp->x_rx_pages_writable; 980 981 /* 982 * For each individual request, the sequence of actions is: 983 * 984 * 1. get the request. 985 * 2. map the page based on the grant ref. 986 * 3. allocate an mblk, copy the data to it. 987 * 4. release the grant. 988 * 5. update the ring. 989 * 6. pass the packet upward. 990 * 7. kick the peer. 991 * 992 * In fact, we try to perform the grant operations in batches, 993 * so there are two loops. 994 */ 995 996 head = tail = NULL; 997 around: 998 ASSERT(MUTEX_HELD(&xnbp->x_rx_lock)); 999 1000 /*LINTED: constant in conditional context*/ 1001 RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->x_tx_ring, work_to_do); 1002 if (!work_to_do) { 1003 finished: 1004 xnb_rx_notify_peer(xnbp); 1005 1006 return (head); 1007 } 1008 1009 start = xnbp->x_tx_ring.req_cons; 1010 end = xnbp->x_tx_ring.sring->req_prod; 1011 1012 for (loop = start, mop = xnbp->x_rx_mop, rxpp = xnbp->x_rx_bufp; 1013 loop != end; 1014 loop++, mop++, rxpp++) { 1015 xnb_rxbuf_t *rxp; 1016 1017 rxp = xnb_rxbuf_get(xnbp, KM_NOSLEEP); 1018 if (rxp == NULL) 1019 break; 1020 1021 ASSERT(xnbp->x_rx_pages_writable || 1022 ((rxp->xr_mop.flags & GNTMAP_readonly) 1023 == GNTMAP_readonly)); 1024 1025 rxp->xr_mop.ref = 1026 RING_GET_REQUEST(&xnbp->x_tx_ring, loop)->gref; 1027 1028 ASSERT(rxp->xr_mop.ref < NR_GRANT_ENTRIES); 1029 1030 *mop = rxp->xr_mop; 1031 *rxpp = rxp; 1032 } 1033 1034 if ((loop - start) == 0) 1035 goto finished; 1036 1037 end = loop; 1038 1039 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1040 xnbp->x_rx_mop, end - start) != 0) { 1041 1042 cmn_err(CE_WARN, "xnb_recv: map grant operation failed"); 1043 1044 loop = start; 1045 rxpp = xnbp->x_rx_bufp; 1046 1047 while (loop != end) { 1048 xnb_rxbuf_put(xnbp, *rxpp); 1049 1050 loop++; 1051 rxpp++; 1052 } 1053 1054 goto finished; 1055 } 1056 1057 for (loop = start, mop = xnbp->x_rx_mop, rxpp = xnbp->x_rx_bufp; 1058 loop != end; 1059 loop++, mop++, rxpp++) { 1060 mblk_t *mp = NULL; 1061 int16_t status = NETIF_RSP_OKAY; 1062 xnb_rxbuf_t *rxp = *rxpp; 1063 1064 if (mop->status != 0) { 1065 cmn_err(CE_WARN, "xnb_recv: " 1066 "failed to map buffer: %d", 1067 mop->status); 1068 status = NETIF_RSP_ERROR; 1069 } 1070 1071 txreq = RING_GET_REQUEST(&xnbp->x_tx_ring, loop); 1072 1073 if (status == NETIF_RSP_OKAY) { 1074 if (copy) { 1075 mp = allocb(txreq->size, BPRI_MED); 1076 if (mp == NULL) { 1077 status = NETIF_RSP_ERROR; 1078 xnbp->x_stat_rx_allocb_failed++; 1079 } else { 1080 bcopy((caddr_t)(uintptr_t) 1081 mop->host_addr + txreq->offset, 1082 mp->b_wptr, txreq->size); 1083 mp->b_wptr += txreq->size; 1084 } 1085 } else { 1086 mp = desballoc((unsigned char *)(uintptr_t) 1087 mop->host_addr + txreq->offset, 1088 txreq->size, 0, &rxp->xr_free_rtn); 1089 if (mp == NULL) { 1090 status = NETIF_RSP_ERROR; 1091 xnbp->x_stat_rx_allocb_failed++; 1092 } else { 1093 rxp->xr_id = txreq->id; 1094 rxp->xr_status = status; 1095 rxp->xr_mop = *mop; 1096 1097 mp->b_wptr += txreq->size; 1098 } 1099 } 1100 1101 /* 1102 * If we have a buffer and there are checksum 1103 * flags, process them appropriately. 1104 */ 1105 if ((mp != NULL) && 1106 ((txreq->flags & 1107 (NETTXF_csum_blank | NETTXF_data_validated)) 1108 != 0)) { 1109 mp = xnbp->x_flavour->xf_cksum_from_peer(xnbp, 1110 mp, txreq->flags); 1111 xnbp->x_stat_rx_cksum_no_need++; 1112 } 1113 } 1114 1115 if (copy || (mp == NULL)) { 1116 xnb_rx_mark_complete(xnbp, txreq->id, status); 1117 xnb_rx_schedule_unmop(xnbp, mop); 1118 } 1119 1120 if (mp != NULL) { 1121 xnbp->x_stat_ipackets++; 1122 xnbp->x_stat_rbytes += txreq->size; 1123 1124 mp->b_next = NULL; 1125 if (head == NULL) { 1126 ASSERT(tail == NULL); 1127 head = mp; 1128 } else { 1129 ASSERT(tail != NULL); 1130 tail->b_next = mp; 1131 } 1132 tail = mp; 1133 } 1134 } 1135 1136 /* 1137 * This has to be here rather than in the 'finished' code 1138 * because we can only handle NET_TX_RING_SIZE pending unmap 1139 * operations, which may be exceeded by multiple trips around 1140 * the receive loop during heavy load (one trip around the 1141 * loop cannot generate more than NET_TX_RING_SIZE unmap 1142 * operations). 1143 */ 1144 xnb_rx_perform_pending_unmop(xnbp); 1145 if (copy) { 1146 for (loop = start, rxpp = xnbp->x_rx_bufp; 1147 loop != end; 1148 loop++, rxpp++) 1149 xnb_rxbuf_put(xnbp, *rxpp); 1150 } 1151 1152 xnbp->x_tx_ring.req_cons = loop; 1153 1154 goto around; 1155 /* NOTREACHED */ 1156 } 1157 1158 /* 1159 * intr() -- ring interrupt service routine 1160 */ 1161 static uint_t 1162 xnb_intr(caddr_t arg) 1163 { 1164 xnb_t *xnbp = (xnb_t *)arg; 1165 mblk_t *mp; 1166 1167 xnbp->x_stat_intr++; 1168 1169 mutex_enter(&xnbp->x_rx_lock); 1170 1171 ASSERT(xnbp->x_connected); 1172 1173 mp = xnb_recv(xnbp); 1174 1175 mutex_exit(&xnbp->x_rx_lock); 1176 1177 if (!xnbp->x_hotplugged) { 1178 xnbp->x_stat_rx_too_early++; 1179 goto fail; 1180 } 1181 if (mp == NULL) { 1182 xnbp->x_stat_spurious_intr++; 1183 goto fail; 1184 } 1185 1186 xnbp->x_flavour->xf_recv(xnbp, mp); 1187 1188 return (DDI_INTR_CLAIMED); 1189 1190 fail: 1191 freemsgchain(mp); 1192 return (DDI_INTR_CLAIMED); 1193 } 1194 1195 static boolean_t 1196 xnb_connect_rings(dev_info_t *dip) 1197 { 1198 xnb_t *xnbp = ddi_get_driver_private(dip); 1199 char *oename; 1200 struct gnttab_map_grant_ref map_op; 1201 evtchn_port_t evtchn; 1202 int i; 1203 1204 /* 1205 * Cannot attempt to connect the rings if already connected. 1206 */ 1207 ASSERT(!xnbp->x_connected); 1208 1209 oename = xvdi_get_oename(dip); 1210 1211 if (xenbus_gather(XBT_NULL, oename, 1212 "event-channel", "%u", &evtchn, 1213 "tx-ring-ref", "%lu", &xnbp->x_tx_ring_ref, 1214 "rx-ring-ref", "%lu", &xnbp->x_rx_ring_ref, 1215 NULL) != 0) { 1216 cmn_err(CE_WARN, "xnb_connect_rings: " 1217 "cannot read other-end details from %s", 1218 oename); 1219 goto fail; 1220 } 1221 1222 if (xenbus_scanf(XBT_NULL, oename, 1223 "feature-tx-writable", "%d", &i) != 0) 1224 i = 0; 1225 if (i != 0) 1226 xnbp->x_rx_pages_writable = B_TRUE; 1227 1228 if (xenbus_scanf(XBT_NULL, oename, 1229 "feature-no-csum-offload", "%d", &i) != 0) 1230 i = 0; 1231 if ((i == 1) || !xnbp->x_cksum_offload) 1232 xnbp->x_cksum_offload = B_FALSE; 1233 1234 /* 1235 * 1. allocate a vaddr for the tx page, one for the rx page. 1236 * 2. call GNTTABOP_map_grant_ref to map the relevant pages 1237 * into the allocated vaddr (one for tx, one for rx). 1238 * 3. call EVTCHNOP_bind_interdomain to have the event channel 1239 * bound to this domain. 1240 * 4. associate the event channel with an interrupt. 1241 * 5. declare ourselves connected. 1242 * 6. enable the interrupt. 1243 */ 1244 1245 /* 1.tx */ 1246 xnbp->x_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1247 0, 0, 0, 0, VM_SLEEP); 1248 ASSERT(xnbp->x_tx_ring_addr != NULL); 1249 1250 /* 2.tx */ 1251 map_op.host_addr = (uint64_t)((long)xnbp->x_tx_ring_addr); 1252 map_op.flags = GNTMAP_host_map; 1253 map_op.ref = xnbp->x_tx_ring_ref; 1254 map_op.dom = xnbp->x_peer; 1255 hat_prepare_mapping(kas.a_hat, xnbp->x_tx_ring_addr); 1256 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1257 &map_op, 1) != 0 || map_op.status != 0) { 1258 cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page."); 1259 goto fail; 1260 } 1261 xnbp->x_tx_ring_handle = map_op.handle; 1262 1263 /*LINTED: constant in conditional context*/ 1264 BACK_RING_INIT(&xnbp->x_tx_ring, 1265 (netif_tx_sring_t *)xnbp->x_tx_ring_addr, PAGESIZE); 1266 1267 /* 1.rx */ 1268 xnbp->x_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1269 0, 0, 0, 0, VM_SLEEP); 1270 ASSERT(xnbp->x_rx_ring_addr != NULL); 1271 1272 /* 2.rx */ 1273 map_op.host_addr = (uint64_t)((long)xnbp->x_rx_ring_addr); 1274 map_op.flags = GNTMAP_host_map; 1275 map_op.ref = xnbp->x_rx_ring_ref; 1276 map_op.dom = xnbp->x_peer; 1277 hat_prepare_mapping(kas.a_hat, xnbp->x_rx_ring_addr); 1278 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1279 &map_op, 1) != 0 || map_op.status != 0) { 1280 cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page."); 1281 goto fail; 1282 } 1283 xnbp->x_rx_ring_handle = map_op.handle; 1284 1285 /*LINTED: constant in conditional context*/ 1286 BACK_RING_INIT(&xnbp->x_rx_ring, 1287 (netif_rx_sring_t *)xnbp->x_rx_ring_addr, PAGESIZE); 1288 1289 /* 3 */ 1290 if (xvdi_bind_evtchn(dip, evtchn) != DDI_SUCCESS) { 1291 cmn_err(CE_WARN, "xnb_connect_rings: " 1292 "cannot bind event channel %d", xnbp->x_evtchn); 1293 xnbp->x_evtchn = INVALID_EVTCHN; 1294 goto fail; 1295 } 1296 xnbp->x_evtchn = xvdi_get_evtchn(dip); 1297 1298 /* 1299 * It would be good to set the state to XenbusStateConnected 1300 * here as well, but then what if ddi_add_intr() failed? 1301 * Changing the state in the store will be noticed by the peer 1302 * and cannot be "taken back". 1303 */ 1304 mutex_enter(&xnbp->x_tx_lock); 1305 mutex_enter(&xnbp->x_rx_lock); 1306 1307 /* 5.1 */ 1308 xnbp->x_connected = B_TRUE; 1309 1310 mutex_exit(&xnbp->x_rx_lock); 1311 mutex_exit(&xnbp->x_tx_lock); 1312 1313 /* 4, 6 */ 1314 if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp) 1315 != DDI_SUCCESS) { 1316 cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt"); 1317 goto fail; 1318 } 1319 xnbp->x_irq = B_TRUE; 1320 1321 /* 5.2 */ 1322 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1323 1324 return (B_TRUE); 1325 1326 fail: 1327 mutex_enter(&xnbp->x_tx_lock); 1328 mutex_enter(&xnbp->x_rx_lock); 1329 1330 xnbp->x_connected = B_FALSE; 1331 1332 mutex_exit(&xnbp->x_rx_lock); 1333 mutex_exit(&xnbp->x_tx_lock); 1334 1335 return (B_FALSE); 1336 } 1337 1338 static void 1339 xnb_disconnect_rings(dev_info_t *dip) 1340 { 1341 xnb_t *xnbp = ddi_get_driver_private(dip); 1342 1343 if (xnbp->x_irq) { 1344 ddi_remove_intr(dip, 0, NULL); 1345 xnbp->x_irq = B_FALSE; 1346 } 1347 1348 if (xnbp->x_evtchn != INVALID_EVTCHN) { 1349 xvdi_free_evtchn(dip); 1350 xnbp->x_evtchn = INVALID_EVTCHN; 1351 } 1352 1353 if (xnbp->x_rx_ring_handle != INVALID_GRANT_HANDLE) { 1354 struct gnttab_unmap_grant_ref unmap_op; 1355 1356 unmap_op.host_addr = (uint64_t)(uintptr_t)xnbp->x_rx_ring_addr; 1357 unmap_op.dev_bus_addr = 0; 1358 unmap_op.handle = xnbp->x_rx_ring_handle; 1359 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1360 &unmap_op, 1) != 0) 1361 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1362 "cannot unmap rx-ring page (%d)", 1363 unmap_op.status); 1364 1365 xnbp->x_rx_ring_handle = INVALID_GRANT_HANDLE; 1366 } 1367 1368 if (xnbp->x_rx_ring_addr != NULL) { 1369 hat_release_mapping(kas.a_hat, xnbp->x_rx_ring_addr); 1370 vmem_free(heap_arena, xnbp->x_rx_ring_addr, PAGESIZE); 1371 xnbp->x_rx_ring_addr = NULL; 1372 } 1373 1374 if (xnbp->x_tx_ring_handle != INVALID_GRANT_HANDLE) { 1375 struct gnttab_unmap_grant_ref unmap_op; 1376 1377 unmap_op.host_addr = (uint64_t)(uintptr_t)xnbp->x_tx_ring_addr; 1378 unmap_op.dev_bus_addr = 0; 1379 unmap_op.handle = xnbp->x_tx_ring_handle; 1380 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1381 &unmap_op, 1) != 0) 1382 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1383 "cannot unmap tx-ring page (%d)", 1384 unmap_op.status); 1385 1386 xnbp->x_tx_ring_handle = INVALID_GRANT_HANDLE; 1387 } 1388 1389 if (xnbp->x_tx_ring_addr != NULL) { 1390 hat_release_mapping(kas.a_hat, xnbp->x_tx_ring_addr); 1391 vmem_free(heap_arena, xnbp->x_tx_ring_addr, PAGESIZE); 1392 xnbp->x_tx_ring_addr = NULL; 1393 } 1394 } 1395 1396 /*ARGSUSED*/ 1397 static void 1398 xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1399 void *arg, void *impl_data) 1400 { 1401 xnb_t *xnbp = ddi_get_driver_private(dip); 1402 XenbusState new_state = *(XenbusState *)impl_data; 1403 1404 ASSERT(xnbp != NULL); 1405 1406 switch (new_state) { 1407 case XenbusStateConnected: 1408 if (xnb_connect_rings(dip)) { 1409 xnbp->x_flavour->xf_peer_connected(xnbp); 1410 } else { 1411 xnbp->x_flavour->xf_peer_disconnected(xnbp); 1412 xnb_disconnect_rings(dip); 1413 (void) xvdi_switch_state(dip, XBT_NULL, 1414 XenbusStateClosed); 1415 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1416 } 1417 1418 /* 1419 * Now that we've attempted to connect it's reasonable 1420 * to allow an attempt to detach. 1421 */ 1422 xnbp->x_detachable = B_TRUE; 1423 1424 break; 1425 1426 case XenbusStateClosing: 1427 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); 1428 1429 break; 1430 1431 case XenbusStateClosed: 1432 xnbp->x_flavour->xf_peer_disconnected(xnbp); 1433 1434 mutex_enter(&xnbp->x_tx_lock); 1435 mutex_enter(&xnbp->x_rx_lock); 1436 1437 xnb_disconnect_rings(dip); 1438 xnbp->x_connected = B_FALSE; 1439 1440 mutex_exit(&xnbp->x_rx_lock); 1441 mutex_exit(&xnbp->x_tx_lock); 1442 1443 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1444 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1445 /* 1446 * In all likelyhood this is already set (in the above 1447 * case), but if the peer never attempted to connect 1448 * and the domain is destroyed we get here without 1449 * having been through the case above, so we set it to 1450 * be sure. 1451 */ 1452 xnbp->x_detachable = B_TRUE; 1453 1454 break; 1455 1456 default: 1457 break; 1458 } 1459 } 1460 1461 /*ARGSUSED*/ 1462 static void 1463 xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1464 void *arg, void *impl_data) 1465 { 1466 xnb_t *xnbp = ddi_get_driver_private(dip); 1467 xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data; 1468 boolean_t success; 1469 1470 ASSERT(xnbp != NULL); 1471 1472 switch (state) { 1473 case Connected: 1474 1475 success = xnbp->x_flavour->xf_hotplug_connected(xnbp); 1476 1477 mutex_enter(&xnbp->x_tx_lock); 1478 mutex_enter(&xnbp->x_rx_lock); 1479 1480 xnbp->x_hotplugged = success; 1481 1482 mutex_exit(&xnbp->x_rx_lock); 1483 mutex_exit(&xnbp->x_tx_lock); 1484 break; 1485 1486 default: 1487 break; 1488 } 1489 } 1490 1491 static struct modldrv modldrv = { 1492 &mod_miscops, "xnb module %I%", 1493 }; 1494 1495 static struct modlinkage modlinkage = { 1496 MODREV_1, &modldrv, NULL 1497 }; 1498 1499 int 1500 _init(void) 1501 { 1502 int i; 1503 1504 mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL); 1505 1506 xnb_rxbuf_cachep = kmem_cache_create("xnb_rxbuf_cachep", 1507 sizeof (xnb_rxbuf_t), 0, xnb_rxbuf_constructor, 1508 xnb_rxbuf_destructor, NULL, NULL, NULL, 0); 1509 ASSERT(xnb_rxbuf_cachep != NULL); 1510 1511 i = mod_install(&modlinkage); 1512 if (i != DDI_SUCCESS) { 1513 kmem_cache_destroy(xnb_rxbuf_cachep); 1514 mutex_destroy(&xnb_alloc_page_lock); 1515 } 1516 return (i); 1517 } 1518 1519 int 1520 _info(struct modinfo *modinfop) 1521 { 1522 return (mod_info(&modlinkage, modinfop)); 1523 } 1524 1525 int 1526 _fini(void) 1527 { 1528 int i; 1529 1530 i = mod_remove(&modlinkage); 1531 if (i == DDI_SUCCESS) { 1532 kmem_cache_destroy(xnb_rxbuf_cachep); 1533 mutex_destroy(&xnb_alloc_page_lock); 1534 } 1535 return (i); 1536 } 1537