1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * 29 * Copyright (c) 2004 Christian Limpach. 30 * All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. This section intentionally left blank. 41 * 4. The name of the author may not be used to endorse or promote products 42 * derived from this software without specific prior written permission. 43 * 44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 49 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 53 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 */ 55 /* 56 * Section 3 of the above license was updated in response to bug 6379571. 57 */ 58 59 /* 60 * xnf.c - GLDv3 network driver for domU. 61 */ 62 63 /* 64 * This driver uses four per-instance locks: 65 * 66 * xnf_gref_lock: 67 * 68 * Protects access to the grant reference list stored in 69 * xnf_gref_head. Grant references should be acquired and released 70 * using gref_get() and gref_put() respectively. 71 * 72 * xnf_schedlock: 73 * 74 * Protects: 75 * xnf_need_sched - used to record that a previous transmit attempt 76 * failed (and consequently it will be necessary to call 77 * mac_tx_update() when transmit resources are available). 78 * xnf_pending_multicast - the number of multicast requests that 79 * have been submitted to the backend for which we have not 80 * processed responses. 81 * 82 * xnf_txlock: 83 * 84 * Protects the transmit ring (xnf_tx_ring) and associated 85 * structures (notably xnf_tx_pkt_id and xnf_tx_pkt_id_head). 86 * 87 * xnf_rxlock: 88 * 89 * Protects the receive ring (xnf_rx_ring) and associated 90 * structures (notably xnf_rx_pkt_info). 91 * 92 * If driver-global state that affects both the transmit and receive 93 * rings is manipulated, both xnf_txlock and xnf_rxlock should be 94 * held, in that order. 95 * 96 * xnf_schedlock is acquired both whilst holding xnf_txlock and 97 * without. It should always be acquired after xnf_txlock if both are 98 * held. 99 * 100 * Notes: 101 * - atomic_add_64() is used to manipulate counters where we require 102 * accuracy. For counters intended only for observation by humans, 103 * post increment/decrement are used instead. 104 */ 105 106 #include <sys/types.h> 107 #include <sys/errno.h> 108 #include <sys/param.h> 109 #include <sys/sysmacros.h> 110 #include <sys/systm.h> 111 #include <sys/stream.h> 112 #include <sys/strsubr.h> 113 #include <sys/strsun.h> 114 #include <sys/conf.h> 115 #include <sys/ddi.h> 116 #include <sys/devops.h> 117 #include <sys/sunddi.h> 118 #include <sys/sunndi.h> 119 #include <sys/dlpi.h> 120 #include <sys/ethernet.h> 121 #include <sys/strsun.h> 122 #include <sys/pattr.h> 123 #include <inet/ip.h> 124 #include <inet/ip_impl.h> 125 #include <sys/gld.h> 126 #include <sys/modctl.h> 127 #include <sys/mac_provider.h> 128 #include <sys/mac_ether.h> 129 #include <sys/bootinfo.h> 130 #include <sys/mach_mmu.h> 131 #ifdef XPV_HVM_DRIVER 132 #include <sys/xpv_support.h> 133 #include <sys/hypervisor.h> 134 #else 135 #include <sys/hypervisor.h> 136 #include <sys/evtchn_impl.h> 137 #include <sys/balloon_impl.h> 138 #endif 139 #include <xen/public/io/netif.h> 140 #include <sys/gnttab.h> 141 #include <xen/sys/xendev.h> 142 #include <sys/sdt.h> 143 #include <sys/note.h> 144 #include <sys/debug.h> 145 146 #include <io/xnf.h> 147 148 #if defined(DEBUG) || defined(__lint) 149 #define XNF_DEBUG 150 #endif 151 152 #ifdef XNF_DEBUG 153 int xnf_debug = 0; 154 xnf_t *xnf_debug_instance = NULL; 155 #endif 156 157 /* 158 * On a 32 bit PAE system physical and machine addresses are larger 159 * than 32 bits. ddi_btop() on such systems take an unsigned long 160 * argument, and so addresses above 4G are truncated before ddi_btop() 161 * gets to see them. To avoid this, code the shift operation here. 162 */ 163 #define xnf_btop(addr) ((addr) >> PAGESHIFT) 164 165 unsigned int xnf_max_tx_frags = 1; 166 167 /* 168 * Should we use the multicast control feature if the backend provides 169 * it? 170 */ 171 boolean_t xnf_multicast_control = B_TRUE; 172 173 /* 174 * Received packets below this size are copied to a new streams buffer 175 * rather than being desballoc'ed. 176 * 177 * This value is chosen to accommodate traffic where there are a large 178 * number of small packets. For data showing a typical distribution, 179 * see: 180 * 181 * Sinha07a: 182 * Rishi Sinha, Christos Papadopoulos, and John 183 * Heidemann. Internet Packet Size Distributions: Some 184 * Observations. Technical Report ISI-TR-2007-643, 185 * USC/Information Sciences Institute, May, 2007. Orignally 186 * released October 2005 as web page 187 * http://netweb.usc.edu/~sinha/pkt-sizes/. 188 * <http://www.isi.edu/~johnh/PAPERS/Sinha07a.html>. 189 */ 190 size_t xnf_rx_copy_limit = 64; 191 192 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) 193 #define INVALID_GRANT_REF ((grant_ref_t)-1) 194 #define INVALID_TX_ID ((uint16_t)-1) 195 196 #define TX_ID_TO_TXID(p, id) (&((p)->xnf_tx_pkt_id[(id)])) 197 #define TX_ID_VALID(i) (((i) != INVALID_TX_ID) && ((i) < NET_TX_RING_SIZE)) 198 199 /* Required system entry points */ 200 static int xnf_attach(dev_info_t *, ddi_attach_cmd_t); 201 static int xnf_detach(dev_info_t *, ddi_detach_cmd_t); 202 203 /* Required driver entry points for Nemo */ 204 static int xnf_start(void *); 205 static void xnf_stop(void *); 206 static int xnf_set_mac_addr(void *, const uint8_t *); 207 static int xnf_set_multicast(void *, boolean_t, const uint8_t *); 208 static int xnf_set_promiscuous(void *, boolean_t); 209 static mblk_t *xnf_send(void *, mblk_t *); 210 static uint_t xnf_intr(caddr_t); 211 static int xnf_stat(void *, uint_t, uint64_t *); 212 static boolean_t xnf_getcapab(void *, mac_capab_t, void *); 213 214 /* Driver private functions */ 215 static int xnf_alloc_dma_resources(xnf_t *); 216 static void xnf_release_dma_resources(xnf_t *); 217 static void xnf_release_mblks(xnf_t *); 218 219 static int xnf_buf_constructor(void *, void *, int); 220 static void xnf_buf_destructor(void *, void *); 221 static xnf_buf_t *xnf_buf_get(xnf_t *, int, boolean_t); 222 #pragma inline(xnf_buf_get) 223 static void xnf_buf_put(xnf_t *, xnf_buf_t *, boolean_t); 224 #pragma inline(xnf_buf_put) 225 static void xnf_buf_refresh(xnf_buf_t *); 226 #pragma inline(xnf_buf_refresh) 227 static void xnf_buf_recycle(xnf_buf_t *); 228 229 static int xnf_tx_buf_constructor(void *, void *, int); 230 static void xnf_tx_buf_destructor(void *, void *); 231 232 static grant_ref_t gref_get(xnf_t *); 233 #pragma inline(gref_get) 234 static void gref_put(xnf_t *, grant_ref_t); 235 #pragma inline(gref_put) 236 237 static xnf_txid_t *txid_get(xnf_t *); 238 #pragma inline(txid_get) 239 static void txid_put(xnf_t *, xnf_txid_t *); 240 #pragma inline(txid_put) 241 242 void xnf_send_driver_status(int, int); 243 static void xnf_rxbuf_hang(xnf_t *, xnf_buf_t *); 244 static int xnf_tx_clean_ring(xnf_t *); 245 static void oe_state_change(dev_info_t *, ddi_eventcookie_t, 246 void *, void *); 247 static boolean_t xnf_kstat_init(xnf_t *); 248 static void xnf_rx_collect(xnf_t *); 249 250 static mac_callbacks_t xnf_callbacks = { 251 MC_GETCAPAB, 252 xnf_stat, 253 xnf_start, 254 xnf_stop, 255 xnf_set_promiscuous, 256 xnf_set_multicast, 257 xnf_set_mac_addr, 258 xnf_send, 259 NULL, 260 NULL, 261 xnf_getcapab 262 }; 263 264 /* DMA attributes for network ring buffer */ 265 static ddi_dma_attr_t ringbuf_dma_attr = { 266 DMA_ATTR_V0, /* version of this structure */ 267 0, /* lowest usable address */ 268 0xffffffffffffffffULL, /* highest usable address */ 269 0x7fffffff, /* maximum DMAable byte count */ 270 MMU_PAGESIZE, /* alignment in bytes */ 271 0x7ff, /* bitmap of burst sizes */ 272 1, /* minimum transfer */ 273 0xffffffffU, /* maximum transfer */ 274 0xffffffffffffffffULL, /* maximum segment length */ 275 1, /* maximum number of segments */ 276 1, /* granularity */ 277 0, /* flags (reserved) */ 278 }; 279 280 /* DMA attributes for transmit and receive data */ 281 static ddi_dma_attr_t buf_dma_attr = { 282 DMA_ATTR_V0, /* version of this structure */ 283 0, /* lowest usable address */ 284 0xffffffffffffffffULL, /* highest usable address */ 285 0x7fffffff, /* maximum DMAable byte count */ 286 MMU_PAGESIZE, /* alignment in bytes */ 287 0x7ff, /* bitmap of burst sizes */ 288 1, /* minimum transfer */ 289 0xffffffffU, /* maximum transfer */ 290 0xffffffffffffffffULL, /* maximum segment length */ 291 1, /* maximum number of segments */ 292 1, /* granularity */ 293 0, /* flags (reserved) */ 294 }; 295 296 /* DMA access attributes for registers and descriptors */ 297 static ddi_device_acc_attr_t accattr = { 298 DDI_DEVICE_ATTR_V0, 299 DDI_STRUCTURE_LE_ACC, /* This is a little-endian device */ 300 DDI_STRICTORDER_ACC 301 }; 302 303 /* DMA access attributes for data: NOT to be byte swapped. */ 304 static ddi_device_acc_attr_t data_accattr = { 305 DDI_DEVICE_ATTR_V0, 306 DDI_NEVERSWAP_ACC, 307 DDI_STRICTORDER_ACC 308 }; 309 310 DDI_DEFINE_STREAM_OPS(xnf_dev_ops, nulldev, nulldev, xnf_attach, xnf_detach, 311 nodev, NULL, D_MP, NULL, ddi_quiesce_not_supported); 312 313 static struct modldrv xnf_modldrv = { 314 &mod_driverops, 315 "Virtual Ethernet driver", 316 &xnf_dev_ops 317 }; 318 319 static struct modlinkage modlinkage = { 320 MODREV_1, &xnf_modldrv, NULL 321 }; 322 323 int 324 _init(void) 325 { 326 int r; 327 328 mac_init_ops(&xnf_dev_ops, "xnf"); 329 r = mod_install(&modlinkage); 330 if (r != DDI_SUCCESS) 331 mac_fini_ops(&xnf_dev_ops); 332 333 return (r); 334 } 335 336 int 337 _fini(void) 338 { 339 return (EBUSY); /* XXPV should be removable */ 340 } 341 342 int 343 _info(struct modinfo *modinfop) 344 { 345 return (mod_info(&modlinkage, modinfop)); 346 } 347 348 /* 349 * Acquire a grant reference. 350 */ 351 static grant_ref_t 352 gref_get(xnf_t *xnfp) 353 { 354 grant_ref_t gref; 355 356 mutex_enter(&xnfp->xnf_gref_lock); 357 358 do { 359 gref = gnttab_claim_grant_reference(&xnfp->xnf_gref_head); 360 361 } while ((gref == INVALID_GRANT_REF) && 362 (gnttab_alloc_grant_references(16, &xnfp->xnf_gref_head) == 0)); 363 364 mutex_exit(&xnfp->xnf_gref_lock); 365 366 if (gref == INVALID_GRANT_REF) { 367 xnfp->xnf_stat_gref_failure++; 368 } else { 369 atomic_add_64(&xnfp->xnf_stat_gref_outstanding, 1); 370 if (xnfp->xnf_stat_gref_outstanding > xnfp->xnf_stat_gref_peak) 371 xnfp->xnf_stat_gref_peak = 372 xnfp->xnf_stat_gref_outstanding; 373 } 374 375 return (gref); 376 } 377 378 /* 379 * Release a grant reference. 380 */ 381 static void 382 gref_put(xnf_t *xnfp, grant_ref_t gref) 383 { 384 ASSERT(gref != INVALID_GRANT_REF); 385 386 mutex_enter(&xnfp->xnf_gref_lock); 387 gnttab_release_grant_reference(&xnfp->xnf_gref_head, gref); 388 mutex_exit(&xnfp->xnf_gref_lock); 389 390 atomic_add_64(&xnfp->xnf_stat_gref_outstanding, -1); 391 } 392 393 /* 394 * Acquire a transmit id. 395 */ 396 static xnf_txid_t * 397 txid_get(xnf_t *xnfp) 398 { 399 xnf_txid_t *tidp; 400 401 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 402 403 if (xnfp->xnf_tx_pkt_id_head == INVALID_TX_ID) 404 return (NULL); 405 406 ASSERT(TX_ID_VALID(xnfp->xnf_tx_pkt_id_head)); 407 408 tidp = TX_ID_TO_TXID(xnfp, xnfp->xnf_tx_pkt_id_head); 409 xnfp->xnf_tx_pkt_id_head = tidp->next; 410 tidp->next = INVALID_TX_ID; 411 412 ASSERT(tidp->txbuf == NULL); 413 414 return (tidp); 415 } 416 417 /* 418 * Release a transmit id. 419 */ 420 static void 421 txid_put(xnf_t *xnfp, xnf_txid_t *tidp) 422 { 423 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 424 ASSERT(TX_ID_VALID(tidp->id)); 425 ASSERT(tidp->next == INVALID_TX_ID); 426 427 tidp->txbuf = NULL; 428 tidp->next = xnfp->xnf_tx_pkt_id_head; 429 xnfp->xnf_tx_pkt_id_head = tidp->id; 430 } 431 432 /* 433 * Get `wanted' slots in the transmit ring, waiting for at least that 434 * number if `wait' is B_TRUE. Force the ring to be cleaned by setting 435 * `wanted' to zero. 436 * 437 * Return the number of slots available. 438 */ 439 static int 440 tx_slots_get(xnf_t *xnfp, int wanted, boolean_t wait) 441 { 442 int slotsfree; 443 boolean_t forced_clean = (wanted == 0); 444 445 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 446 447 /* LINTED: constant in conditional context */ 448 while (B_TRUE) { 449 slotsfree = RING_FREE_REQUESTS(&xnfp->xnf_tx_ring); 450 451 if ((slotsfree < wanted) || forced_clean) 452 slotsfree = xnf_tx_clean_ring(xnfp); 453 454 /* 455 * If there are more than we need free, tell other 456 * people to come looking again. We hold txlock, so we 457 * are able to take our slots before anyone else runs. 458 */ 459 if (slotsfree > wanted) 460 cv_broadcast(&xnfp->xnf_cv_tx_slots); 461 462 if (slotsfree >= wanted) 463 break; 464 465 if (!wait) 466 break; 467 468 cv_wait(&xnfp->xnf_cv_tx_slots, &xnfp->xnf_txlock); 469 } 470 471 ASSERT(slotsfree <= RING_SIZE(&(xnfp->xnf_tx_ring))); 472 473 return (slotsfree); 474 } 475 476 static int 477 xnf_setup_rings(xnf_t *xnfp) 478 { 479 domid_t oeid; 480 struct xenbus_device *xsd; 481 RING_IDX i; 482 int err; 483 xnf_txid_t *tidp; 484 xnf_buf_t **bdescp; 485 486 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 487 xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 488 489 if (xnfp->xnf_tx_ring_ref != INVALID_GRANT_REF) 490 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 491 492 err = gnttab_grant_foreign_access(oeid, 493 xnf_btop(pa_to_ma(xnfp->xnf_tx_ring_phys_addr)), 0); 494 if (err <= 0) { 495 err = -err; 496 xenbus_dev_error(xsd, err, "granting access to tx ring page"); 497 goto out; 498 } 499 xnfp->xnf_tx_ring_ref = (grant_ref_t)err; 500 501 if (xnfp->xnf_rx_ring_ref != INVALID_GRANT_REF) 502 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 503 504 err = gnttab_grant_foreign_access(oeid, 505 xnf_btop(pa_to_ma(xnfp->xnf_rx_ring_phys_addr)), 0); 506 if (err <= 0) { 507 err = -err; 508 xenbus_dev_error(xsd, err, "granting access to rx ring page"); 509 goto out; 510 } 511 xnfp->xnf_rx_ring_ref = (grant_ref_t)err; 512 513 mutex_enter(&xnfp->xnf_txlock); 514 515 /* 516 * Setup/cleanup the TX ring. Note that this can lose packets 517 * after a resume, but we expect to stagger on. 518 */ 519 xnfp->xnf_tx_pkt_id_head = INVALID_TX_ID; /* I.e. emtpy list. */ 520 for (i = 0, tidp = &xnfp->xnf_tx_pkt_id[0]; 521 i < NET_TX_RING_SIZE; 522 i++, tidp++) { 523 xnf_txbuf_t *txp; 524 525 tidp->id = i; 526 527 txp = tidp->txbuf; 528 if (txp == NULL) { 529 tidp->next = INVALID_TX_ID; /* Appease txid_put(). */ 530 txid_put(xnfp, tidp); 531 continue; 532 } 533 534 ASSERT(txp->tx_txreq.gref != INVALID_GRANT_REF); 535 ASSERT(txp->tx_mp != NULL); 536 537 switch (txp->tx_type) { 538 case TX_DATA: 539 VERIFY(gnttab_query_foreign_access(txp->tx_txreq.gref) 540 == 0); 541 542 if (txp->tx_bdesc == NULL) { 543 (void) gnttab_end_foreign_access_ref( 544 txp->tx_txreq.gref, 1); 545 gref_put(xnfp, txp->tx_txreq.gref); 546 (void) ddi_dma_unbind_handle( 547 txp->tx_dma_handle); 548 } else { 549 xnf_buf_put(xnfp, txp->tx_bdesc, B_TRUE); 550 } 551 552 freemsg(txp->tx_mp); 553 txid_put(xnfp, tidp); 554 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 555 556 break; 557 558 case TX_MCAST_REQ: 559 txp->tx_type = TX_MCAST_RSP; 560 txp->tx_status = NETIF_RSP_DROPPED; 561 cv_broadcast(&xnfp->xnf_cv_multicast); 562 563 /* 564 * The request consumed two slots in the ring, 565 * yet only a single xnf_txid_t is used. Step 566 * over the empty slot. 567 */ 568 i++; 569 ASSERT(i < NET_TX_RING_SIZE); 570 571 break; 572 573 case TX_MCAST_RSP: 574 break; 575 } 576 } 577 578 /* LINTED: constant in conditional context */ 579 SHARED_RING_INIT(xnfp->xnf_tx_ring.sring); 580 /* LINTED: constant in conditional context */ 581 FRONT_RING_INIT(&xnfp->xnf_tx_ring, 582 xnfp->xnf_tx_ring.sring, PAGESIZE); 583 584 mutex_exit(&xnfp->xnf_txlock); 585 586 mutex_enter(&xnfp->xnf_rxlock); 587 588 /* 589 * Clean out any buffers currently posted to the receive ring 590 * before we reset it. 591 */ 592 for (i = 0, bdescp = &xnfp->xnf_rx_pkt_info[0]; 593 i < NET_RX_RING_SIZE; 594 i++, bdescp++) { 595 if (*bdescp != NULL) { 596 xnf_buf_put(xnfp, *bdescp, B_FALSE); 597 *bdescp = NULL; 598 } 599 } 600 601 /* LINTED: constant in conditional context */ 602 SHARED_RING_INIT(xnfp->xnf_rx_ring.sring); 603 /* LINTED: constant in conditional context */ 604 FRONT_RING_INIT(&xnfp->xnf_rx_ring, 605 xnfp->xnf_rx_ring.sring, PAGESIZE); 606 607 /* 608 * Fill the ring with buffers. 609 */ 610 for (i = 0; i < NET_RX_RING_SIZE; i++) { 611 xnf_buf_t *bdesc; 612 613 bdesc = xnf_buf_get(xnfp, KM_SLEEP, B_FALSE); 614 VERIFY(bdesc != NULL); 615 xnf_rxbuf_hang(xnfp, bdesc); 616 } 617 618 /* LINTED: constant in conditional context */ 619 RING_PUSH_REQUESTS(&xnfp->xnf_rx_ring); 620 621 mutex_exit(&xnfp->xnf_rxlock); 622 623 return (0); 624 625 out: 626 if (xnfp->xnf_tx_ring_ref != INVALID_GRANT_REF) 627 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 628 xnfp->xnf_tx_ring_ref = INVALID_GRANT_REF; 629 630 if (xnfp->xnf_rx_ring_ref != INVALID_GRANT_REF) 631 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 632 xnfp->xnf_rx_ring_ref = INVALID_GRANT_REF; 633 634 return (err); 635 } 636 637 /* 638 * Connect driver to back end, called to set up communication with 639 * back end driver both initially and on resume after restore/migrate. 640 */ 641 void 642 xnf_be_connect(xnf_t *xnfp) 643 { 644 const char *message; 645 xenbus_transaction_t xbt; 646 struct xenbus_device *xsd; 647 char *xsname; 648 int err; 649 650 ASSERT(!xnfp->xnf_connected); 651 652 xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 653 xsname = xvdi_get_xsname(xnfp->xnf_devinfo); 654 655 err = xnf_setup_rings(xnfp); 656 if (err != 0) { 657 cmn_err(CE_WARN, "failed to set up tx/rx rings"); 658 xenbus_dev_error(xsd, err, "setting up ring"); 659 return; 660 } 661 662 again: 663 err = xenbus_transaction_start(&xbt); 664 if (err != 0) { 665 xenbus_dev_error(xsd, EIO, "starting transaction"); 666 return; 667 } 668 669 err = xenbus_printf(xbt, xsname, "tx-ring-ref", "%u", 670 xnfp->xnf_tx_ring_ref); 671 if (err != 0) { 672 message = "writing tx ring-ref"; 673 goto abort_transaction; 674 } 675 676 err = xenbus_printf(xbt, xsname, "rx-ring-ref", "%u", 677 xnfp->xnf_rx_ring_ref); 678 if (err != 0) { 679 message = "writing rx ring-ref"; 680 goto abort_transaction; 681 } 682 683 err = xenbus_printf(xbt, xsname, "event-channel", "%u", 684 xnfp->xnf_evtchn); 685 if (err != 0) { 686 message = "writing event-channel"; 687 goto abort_transaction; 688 } 689 690 err = xenbus_printf(xbt, xsname, "feature-rx-notify", "%d", 1); 691 if (err != 0) { 692 message = "writing feature-rx-notify"; 693 goto abort_transaction; 694 } 695 696 err = xenbus_printf(xbt, xsname, "request-rx-copy", "%d", 1); 697 if (err != 0) { 698 message = "writing request-rx-copy"; 699 goto abort_transaction; 700 } 701 702 if (xnfp->xnf_be_mcast_control) { 703 err = xenbus_printf(xbt, xsname, "request-multicast-control", 704 "%d", 1); 705 if (err != 0) { 706 message = "writing request-multicast-control"; 707 goto abort_transaction; 708 } 709 } 710 711 err = xvdi_switch_state(xnfp->xnf_devinfo, xbt, XenbusStateConnected); 712 if (err != 0) { 713 message = "switching state to XenbusStateConnected"; 714 goto abort_transaction; 715 } 716 717 err = xenbus_transaction_end(xbt, 0); 718 if (err != 0) { 719 if (err == EAGAIN) 720 goto again; 721 xenbus_dev_error(xsd, err, "completing transaction"); 722 } 723 724 return; 725 726 abort_transaction: 727 (void) xenbus_transaction_end(xbt, 1); 728 xenbus_dev_error(xsd, err, "%s", message); 729 } 730 731 /* 732 * Read configuration information from xenstore. 733 */ 734 void 735 xnf_read_config(xnf_t *xnfp) 736 { 737 int err, be_cap; 738 char mac[ETHERADDRL * 3]; 739 char *oename = xvdi_get_oename(xnfp->xnf_devinfo); 740 741 err = xenbus_scanf(XBT_NULL, oename, "mac", 742 "%s", (char *)&mac[0]); 743 if (err != 0) { 744 /* 745 * bad: we're supposed to be set up with a proper mac 746 * addr. at this point 747 */ 748 cmn_err(CE_WARN, "%s%d: no mac address", 749 ddi_driver_name(xnfp->xnf_devinfo), 750 ddi_get_instance(xnfp->xnf_devinfo)); 751 return; 752 } 753 if (ether_aton(mac, xnfp->xnf_mac_addr) != ETHERADDRL) { 754 err = ENOENT; 755 xenbus_dev_error(xvdi_get_xsd(xnfp->xnf_devinfo), ENOENT, 756 "parsing %s/mac", xvdi_get_xsname(xnfp->xnf_devinfo)); 757 return; 758 } 759 760 err = xenbus_scanf(XBT_NULL, oename, 761 "feature-rx-copy", "%d", &be_cap); 762 /* 763 * If we fail to read the store we assume that the key is 764 * absent, implying an older domain at the far end. Older 765 * domains cannot do HV copy. 766 */ 767 if (err != 0) 768 be_cap = 0; 769 xnfp->xnf_be_rx_copy = (be_cap != 0); 770 771 err = xenbus_scanf(XBT_NULL, oename, 772 "feature-multicast-control", "%d", &be_cap); 773 /* 774 * If we fail to read the store we assume that the key is 775 * absent, implying an older domain at the far end. Older 776 * domains do not support multicast control. 777 */ 778 if (err != 0) 779 be_cap = 0; 780 xnfp->xnf_be_mcast_control = (be_cap != 0) && xnf_multicast_control; 781 } 782 783 /* 784 * attach(9E) -- Attach a device to the system 785 */ 786 static int 787 xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) 788 { 789 mac_register_t *macp; 790 xnf_t *xnfp; 791 int err; 792 char cachename[32]; 793 794 #ifdef XNF_DEBUG 795 if (xnf_debug & XNF_DEBUG_DDI) 796 printf("xnf%d: attach(0x%p)\n", ddi_get_instance(devinfo), 797 (void *)devinfo); 798 #endif 799 800 switch (cmd) { 801 case DDI_RESUME: 802 xnfp = ddi_get_driver_private(devinfo); 803 xnfp->xnf_gen++; 804 805 (void) xvdi_resume(devinfo); 806 (void) xvdi_alloc_evtchn(devinfo); 807 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 808 #ifdef XPV_HVM_DRIVER 809 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, 810 xnfp); 811 #else 812 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, 813 (caddr_t)xnfp); 814 #endif 815 return (DDI_SUCCESS); 816 817 case DDI_ATTACH: 818 break; 819 820 default: 821 return (DDI_FAILURE); 822 } 823 824 /* 825 * Allocate gld_mac_info_t and xnf_instance structures 826 */ 827 macp = mac_alloc(MAC_VERSION); 828 if (macp == NULL) 829 return (DDI_FAILURE); 830 xnfp = kmem_zalloc(sizeof (*xnfp), KM_SLEEP); 831 832 macp->m_dip = devinfo; 833 macp->m_driver = xnfp; 834 xnfp->xnf_devinfo = devinfo; 835 836 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 837 macp->m_src_addr = xnfp->xnf_mac_addr; 838 macp->m_callbacks = &xnf_callbacks; 839 macp->m_min_sdu = 0; 840 macp->m_max_sdu = XNF_MAXPKT; 841 842 xnfp->xnf_running = B_FALSE; 843 xnfp->xnf_connected = B_FALSE; 844 xnfp->xnf_be_rx_copy = B_FALSE; 845 xnfp->xnf_be_mcast_control = B_FALSE; 846 xnfp->xnf_need_sched = B_FALSE; 847 848 xnfp->xnf_rx_head = NULL; 849 xnfp->xnf_rx_tail = NULL; 850 xnfp->xnf_rx_new_buffers_posted = B_FALSE; 851 852 #ifdef XPV_HVM_DRIVER 853 /* 854 * Report our version to dom0. 855 */ 856 if (xenbus_printf(XBT_NULL, "guest/xnf", "version", "%d", 857 HVMPV_XNF_VERS)) 858 cmn_err(CE_WARN, "xnf: couldn't write version\n"); 859 #endif 860 861 /* 862 * Get the iblock cookie with which to initialize the mutexes. 863 */ 864 if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->xnf_icookie) 865 != DDI_SUCCESS) 866 goto failure; 867 868 mutex_init(&xnfp->xnf_txlock, 869 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 870 mutex_init(&xnfp->xnf_rxlock, 871 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 872 mutex_init(&xnfp->xnf_schedlock, 873 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 874 mutex_init(&xnfp->xnf_gref_lock, 875 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 876 877 cv_init(&xnfp->xnf_cv_state, NULL, CV_DEFAULT, NULL); 878 cv_init(&xnfp->xnf_cv_multicast, NULL, CV_DEFAULT, NULL); 879 cv_init(&xnfp->xnf_cv_tx_slots, NULL, CV_DEFAULT, NULL); 880 881 (void) sprintf(cachename, "xnf_buf_cache_%d", 882 ddi_get_instance(devinfo)); 883 xnfp->xnf_buf_cache = kmem_cache_create(cachename, 884 sizeof (xnf_buf_t), 0, 885 xnf_buf_constructor, xnf_buf_destructor, 886 NULL, xnfp, NULL, 0); 887 if (xnfp->xnf_buf_cache == NULL) 888 goto failure_0; 889 890 (void) sprintf(cachename, "xnf_tx_buf_cache_%d", 891 ddi_get_instance(devinfo)); 892 xnfp->xnf_tx_buf_cache = kmem_cache_create(cachename, 893 sizeof (xnf_txbuf_t), 0, 894 xnf_tx_buf_constructor, xnf_tx_buf_destructor, 895 NULL, xnfp, NULL, 0); 896 if (xnfp->xnf_tx_buf_cache == NULL) 897 goto failure_1; 898 899 xnfp->xnf_gref_head = INVALID_GRANT_REF; 900 901 if (xnf_alloc_dma_resources(xnfp) == DDI_FAILURE) { 902 cmn_err(CE_WARN, "xnf%d: failed to allocate and initialize " 903 "driver data structures", 904 ddi_get_instance(xnfp->xnf_devinfo)); 905 goto failure_2; 906 } 907 908 xnfp->xnf_rx_ring.sring->rsp_event = 909 xnfp->xnf_tx_ring.sring->rsp_event = 1; 910 911 xnfp->xnf_tx_ring_ref = INVALID_GRANT_REF; 912 xnfp->xnf_rx_ring_ref = INVALID_GRANT_REF; 913 914 /* set driver private pointer now */ 915 ddi_set_driver_private(devinfo, xnfp); 916 917 if (!xnf_kstat_init(xnfp)) 918 goto failure_3; 919 920 /* 921 * Allocate an event channel, add the interrupt handler and 922 * bind it to the event channel. 923 */ 924 (void) xvdi_alloc_evtchn(devinfo); 925 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 926 #ifdef XPV_HVM_DRIVER 927 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, xnfp); 928 #else 929 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, (caddr_t)xnfp); 930 #endif 931 932 err = mac_register(macp, &xnfp->xnf_mh); 933 mac_free(macp); 934 macp = NULL; 935 if (err != 0) 936 goto failure_4; 937 938 if (xvdi_add_event_handler(devinfo, XS_OE_STATE, oe_state_change, NULL) 939 != DDI_SUCCESS) 940 goto failure_5; 941 942 #ifdef XPV_HVM_DRIVER 943 /* 944 * In the HVM case, this driver essentially replaces a driver for 945 * a 'real' PCI NIC. Without the "model" property set to 946 * "Ethernet controller", like the PCI code does, netbooting does 947 * not work correctly, as strplumb_get_netdev_path() will not find 948 * this interface. 949 */ 950 (void) ndi_prop_update_string(DDI_DEV_T_NONE, devinfo, "model", 951 "Ethernet controller"); 952 #endif 953 954 #ifdef XNF_DEBUG 955 if (xnf_debug_instance == NULL) 956 xnf_debug_instance = xnfp; 957 #endif 958 959 return (DDI_SUCCESS); 960 961 failure_5: 962 (void) mac_unregister(xnfp->xnf_mh); 963 964 failure_4: 965 #ifdef XPV_HVM_DRIVER 966 ec_unbind_evtchn(xnfp->xnf_evtchn); 967 xvdi_free_evtchn(devinfo); 968 #else 969 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 970 #endif 971 xnfp->xnf_evtchn = INVALID_EVTCHN; 972 kstat_delete(xnfp->xnf_kstat_aux); 973 974 failure_3: 975 xnf_release_dma_resources(xnfp); 976 977 failure_2: 978 kmem_cache_destroy(xnfp->xnf_tx_buf_cache); 979 980 failure_1: 981 kmem_cache_destroy(xnfp->xnf_buf_cache); 982 983 failure_0: 984 cv_destroy(&xnfp->xnf_cv_tx_slots); 985 cv_destroy(&xnfp->xnf_cv_multicast); 986 cv_destroy(&xnfp->xnf_cv_state); 987 988 mutex_destroy(&xnfp->xnf_gref_lock); 989 mutex_destroy(&xnfp->xnf_schedlock); 990 mutex_destroy(&xnfp->xnf_rxlock); 991 mutex_destroy(&xnfp->xnf_txlock); 992 993 failure: 994 kmem_free(xnfp, sizeof (*xnfp)); 995 if (macp != NULL) 996 mac_free(macp); 997 998 return (DDI_FAILURE); 999 } 1000 1001 /* detach(9E) -- Detach a device from the system */ 1002 static int 1003 xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) 1004 { 1005 xnf_t *xnfp; /* Our private device info */ 1006 1007 #ifdef XNF_DEBUG 1008 if (xnf_debug & XNF_DEBUG_DDI) 1009 printf("xnf_detach(0x%p)\n", (void *)devinfo); 1010 #endif 1011 1012 xnfp = ddi_get_driver_private(devinfo); 1013 1014 switch (cmd) { 1015 case DDI_SUSPEND: 1016 #ifdef XPV_HVM_DRIVER 1017 ec_unbind_evtchn(xnfp->xnf_evtchn); 1018 xvdi_free_evtchn(devinfo); 1019 #else 1020 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 1021 #endif 1022 1023 xvdi_suspend(devinfo); 1024 1025 mutex_enter(&xnfp->xnf_rxlock); 1026 mutex_enter(&xnfp->xnf_txlock); 1027 1028 xnfp->xnf_evtchn = INVALID_EVTCHN; 1029 xnfp->xnf_connected = B_FALSE; 1030 mutex_exit(&xnfp->xnf_txlock); 1031 mutex_exit(&xnfp->xnf_rxlock); 1032 1033 /* claim link to be down after disconnect */ 1034 mac_link_update(xnfp->xnf_mh, LINK_STATE_DOWN); 1035 return (DDI_SUCCESS); 1036 1037 case DDI_DETACH: 1038 break; 1039 1040 default: 1041 return (DDI_FAILURE); 1042 } 1043 1044 if (xnfp->xnf_connected) 1045 return (DDI_FAILURE); 1046 1047 /* 1048 * Cannot detach if we have xnf_buf_t outstanding. 1049 */ 1050 if (xnfp->xnf_stat_buf_allocated > 0) 1051 return (DDI_FAILURE); 1052 1053 if (mac_unregister(xnfp->xnf_mh) != 0) 1054 return (DDI_FAILURE); 1055 1056 kstat_delete(xnfp->xnf_kstat_aux); 1057 1058 /* Stop the receiver */ 1059 xnf_stop(xnfp); 1060 1061 xvdi_remove_event_handler(devinfo, XS_OE_STATE); 1062 1063 /* Remove the interrupt */ 1064 #ifdef XPV_HVM_DRIVER 1065 ec_unbind_evtchn(xnfp->xnf_evtchn); 1066 xvdi_free_evtchn(devinfo); 1067 #else 1068 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 1069 #endif 1070 1071 /* Release any pending xmit mblks */ 1072 xnf_release_mblks(xnfp); 1073 1074 /* Release all DMA resources */ 1075 xnf_release_dma_resources(xnfp); 1076 1077 cv_destroy(&xnfp->xnf_cv_tx_slots); 1078 cv_destroy(&xnfp->xnf_cv_multicast); 1079 cv_destroy(&xnfp->xnf_cv_state); 1080 1081 kmem_cache_destroy(xnfp->xnf_tx_buf_cache); 1082 kmem_cache_destroy(xnfp->xnf_buf_cache); 1083 1084 mutex_destroy(&xnfp->xnf_gref_lock); 1085 mutex_destroy(&xnfp->xnf_schedlock); 1086 mutex_destroy(&xnfp->xnf_rxlock); 1087 mutex_destroy(&xnfp->xnf_txlock); 1088 1089 kmem_free(xnfp, sizeof (*xnfp)); 1090 1091 return (DDI_SUCCESS); 1092 } 1093 1094 /* 1095 * xnf_set_mac_addr() -- set the physical network address on the board. 1096 */ 1097 static int 1098 xnf_set_mac_addr(void *arg, const uint8_t *macaddr) 1099 { 1100 _NOTE(ARGUNUSED(arg, macaddr)); 1101 1102 /* 1103 * We can't set our macaddr. 1104 */ 1105 return (ENOTSUP); 1106 } 1107 1108 /* 1109 * xnf_set_multicast() -- set (enable) or disable a multicast address. 1110 * 1111 * Program the hardware to enable/disable the multicast address 1112 * in "mca". Enable if "add" is true, disable if false. 1113 */ 1114 static int 1115 xnf_set_multicast(void *arg, boolean_t add, const uint8_t *mca) 1116 { 1117 xnf_t *xnfp = arg; 1118 xnf_txbuf_t *txp; 1119 int n_slots; 1120 RING_IDX slot; 1121 xnf_txid_t *tidp; 1122 netif_tx_request_t *txrp; 1123 struct netif_extra_info *erp; 1124 boolean_t notify, result; 1125 1126 /* 1127 * If the backend does not support multicast control then we 1128 * must assume that the right packets will just arrive. 1129 */ 1130 if (!xnfp->xnf_be_mcast_control) 1131 return (0); 1132 1133 txp = kmem_cache_alloc(xnfp->xnf_tx_buf_cache, KM_SLEEP); 1134 1135 mutex_enter(&xnfp->xnf_txlock); 1136 1137 /* 1138 * If we're not yet connected then claim success. This is 1139 * acceptable because we refresh the entire set of multicast 1140 * addresses when we get connected. 1141 * 1142 * We can't wait around here because the MAC layer expects 1143 * this to be a non-blocking operation - waiting ends up 1144 * causing a deadlock during resume. 1145 */ 1146 if (!xnfp->xnf_connected) { 1147 mutex_exit(&xnfp->xnf_txlock); 1148 return (0); 1149 } 1150 1151 /* 1152 * 1. Acquire two slots in the ring. 1153 * 2. Fill in the slots. 1154 * 3. Request notification when the operation is done. 1155 * 4. Kick the peer. 1156 * 5. Wait for the response via xnf_tx_clean_ring(). 1157 */ 1158 1159 n_slots = tx_slots_get(xnfp, 2, B_TRUE); 1160 ASSERT(n_slots >= 2); 1161 1162 slot = xnfp->xnf_tx_ring.req_prod_pvt; 1163 tidp = txid_get(xnfp); 1164 VERIFY(tidp != NULL); 1165 1166 txp->tx_type = TX_MCAST_REQ; 1167 txp->tx_slot = slot; 1168 1169 txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot); 1170 erp = (struct netif_extra_info *) 1171 RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot + 1); 1172 1173 txrp->gref = 0; 1174 txrp->size = 0; 1175 txrp->offset = 0; 1176 /* Set tx_txreq.id to appease xnf_tx_clean_ring(). */ 1177 txrp->id = txp->tx_txreq.id = tidp->id; 1178 txrp->flags = NETTXF_extra_info; 1179 1180 erp->type = add ? XEN_NETIF_EXTRA_TYPE_MCAST_ADD : 1181 XEN_NETIF_EXTRA_TYPE_MCAST_DEL; 1182 bcopy((void *)mca, &erp->u.mcast.addr, ETHERADDRL); 1183 1184 tidp->txbuf = txp; 1185 1186 xnfp->xnf_tx_ring.req_prod_pvt = slot + 2; 1187 1188 mutex_enter(&xnfp->xnf_schedlock); 1189 xnfp->xnf_pending_multicast++; 1190 mutex_exit(&xnfp->xnf_schedlock); 1191 1192 /* LINTED: constant in conditional context */ 1193 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring, 1194 notify); 1195 if (notify) 1196 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1197 1198 while (txp->tx_type == TX_MCAST_REQ) 1199 cv_wait(&xnfp->xnf_cv_multicast, 1200 &xnfp->xnf_txlock); 1201 1202 ASSERT(txp->tx_type == TX_MCAST_RSP); 1203 1204 mutex_enter(&xnfp->xnf_schedlock); 1205 xnfp->xnf_pending_multicast--; 1206 mutex_exit(&xnfp->xnf_schedlock); 1207 1208 result = (txp->tx_status == NETIF_RSP_OKAY); 1209 1210 txid_put(xnfp, tidp); 1211 1212 mutex_exit(&xnfp->xnf_txlock); 1213 1214 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1215 1216 return (result ? 0 : 1); 1217 } 1218 1219 /* 1220 * xnf_set_promiscuous() -- set or reset promiscuous mode on the board 1221 * 1222 * Program the hardware to enable/disable promiscuous mode. 1223 */ 1224 static int 1225 xnf_set_promiscuous(void *arg, boolean_t on) 1226 { 1227 _NOTE(ARGUNUSED(arg, on)); 1228 1229 /* 1230 * We can't really do this, but we pretend that we can in 1231 * order that snoop will work. 1232 */ 1233 return (0); 1234 } 1235 1236 /* 1237 * Clean buffers that we have responses for from the transmit ring. 1238 */ 1239 static int 1240 xnf_tx_clean_ring(xnf_t *xnfp) 1241 { 1242 boolean_t work_to_do; 1243 1244 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 1245 1246 loop: 1247 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_tx_ring)) { 1248 RING_IDX cons, prod, i; 1249 1250 cons = xnfp->xnf_tx_ring.rsp_cons; 1251 prod = xnfp->xnf_tx_ring.sring->rsp_prod; 1252 membar_consumer(); 1253 /* 1254 * Clean tx requests from ring that we have responses 1255 * for. 1256 */ 1257 DTRACE_PROBE2(xnf_tx_clean_range, int, cons, int, prod); 1258 for (i = cons; i != prod; i++) { 1259 netif_tx_response_t *trp; 1260 xnf_txid_t *tidp; 1261 xnf_txbuf_t *txp; 1262 1263 trp = RING_GET_RESPONSE(&xnfp->xnf_tx_ring, i); 1264 ASSERT(TX_ID_VALID(trp->id)); 1265 1266 tidp = TX_ID_TO_TXID(xnfp, trp->id); 1267 ASSERT(tidp->id == trp->id); 1268 ASSERT(tidp->next == INVALID_TX_ID); 1269 1270 txp = tidp->txbuf; 1271 ASSERT(txp != NULL); 1272 ASSERT(txp->tx_txreq.id == trp->id); 1273 1274 switch (txp->tx_type) { 1275 case TX_DATA: 1276 if (gnttab_query_foreign_access( 1277 txp->tx_txreq.gref) != 0) 1278 cmn_err(CE_PANIC, 1279 "tx grant %d still in use by " 1280 "backend domain", 1281 txp->tx_txreq.gref); 1282 1283 if (txp->tx_bdesc == NULL) { 1284 (void) gnttab_end_foreign_access_ref( 1285 txp->tx_txreq.gref, 1); 1286 gref_put(xnfp, txp->tx_txreq.gref); 1287 (void) ddi_dma_unbind_handle( 1288 txp->tx_dma_handle); 1289 } else { 1290 xnf_buf_put(xnfp, txp->tx_bdesc, 1291 B_TRUE); 1292 } 1293 1294 freemsg(txp->tx_mp); 1295 txid_put(xnfp, tidp); 1296 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1297 1298 break; 1299 1300 case TX_MCAST_REQ: 1301 txp->tx_type = TX_MCAST_RSP; 1302 txp->tx_status = trp->status; 1303 cv_broadcast(&xnfp->xnf_cv_multicast); 1304 1305 break; 1306 1307 case TX_MCAST_RSP: 1308 break; 1309 1310 default: 1311 cmn_err(CE_PANIC, "xnf_tx_clean_ring: " 1312 "invalid xnf_txbuf_t type: %d", 1313 txp->tx_type); 1314 break; 1315 } 1316 } 1317 /* 1318 * Record the last response we dealt with so that we 1319 * know where to start next time around. 1320 */ 1321 xnfp->xnf_tx_ring.rsp_cons = prod; 1322 membar_enter(); 1323 } 1324 1325 /* LINTED: constant in conditional context */ 1326 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_tx_ring, work_to_do); 1327 if (work_to_do) 1328 goto loop; 1329 1330 return (RING_FREE_REQUESTS(&xnfp->xnf_tx_ring)); 1331 } 1332 1333 /* 1334 * Allocate and fill in a look-aside buffer for the packet `mp'. Used 1335 * to ensure that the packet is physically contiguous and contained 1336 * within a single page. 1337 */ 1338 static xnf_buf_t * 1339 xnf_tx_pullup(xnf_t *xnfp, mblk_t *mp) 1340 { 1341 xnf_buf_t *bd; 1342 caddr_t bp; 1343 1344 bd = xnf_buf_get(xnfp, KM_SLEEP, B_TRUE); 1345 if (bd == NULL) 1346 return (NULL); 1347 1348 bp = bd->buf; 1349 while (mp != NULL) { 1350 size_t len = MBLKL(mp); 1351 1352 bcopy(mp->b_rptr, bp, len); 1353 bp += len; 1354 1355 mp = mp->b_cont; 1356 } 1357 1358 ASSERT((bp - bd->buf) <= PAGESIZE); 1359 1360 xnfp->xnf_stat_tx_pullup++; 1361 1362 return (bd); 1363 } 1364 1365 /* 1366 * Insert the pseudo-header checksum into the packet `buf'. 1367 */ 1368 void 1369 xnf_pseudo_cksum(caddr_t buf, int length) 1370 { 1371 struct ether_header *ehp; 1372 uint16_t sap, len, *stuff; 1373 uint32_t cksum; 1374 size_t offset; 1375 ipha_t *ipha; 1376 ipaddr_t src, dst; 1377 1378 ASSERT(length >= sizeof (*ehp)); 1379 ehp = (struct ether_header *)buf; 1380 1381 if (ntohs(ehp->ether_type) == VLAN_TPID) { 1382 struct ether_vlan_header *evhp; 1383 1384 ASSERT(length >= sizeof (*evhp)); 1385 evhp = (struct ether_vlan_header *)buf; 1386 sap = ntohs(evhp->ether_type); 1387 offset = sizeof (*evhp); 1388 } else { 1389 sap = ntohs(ehp->ether_type); 1390 offset = sizeof (*ehp); 1391 } 1392 1393 ASSERT(sap == ETHERTYPE_IP); 1394 1395 /* Packet should have been pulled up by the caller. */ 1396 if ((offset + sizeof (ipha_t)) > length) { 1397 cmn_err(CE_WARN, "xnf_pseudo_cksum: no room for checksum"); 1398 return; 1399 } 1400 1401 ipha = (ipha_t *)(buf + offset); 1402 1403 ASSERT(IPH_HDR_LENGTH(ipha) == IP_SIMPLE_HDR_LENGTH); 1404 1405 len = ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH; 1406 1407 switch (ipha->ipha_protocol) { 1408 case IPPROTO_TCP: 1409 stuff = IPH_TCPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 1410 cksum = IP_TCP_CSUM_COMP; 1411 break; 1412 case IPPROTO_UDP: 1413 stuff = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 1414 cksum = IP_UDP_CSUM_COMP; 1415 break; 1416 default: 1417 cmn_err(CE_WARN, "xnf_pseudo_cksum: unexpected protocol %d", 1418 ipha->ipha_protocol); 1419 return; 1420 } 1421 1422 src = ipha->ipha_src; 1423 dst = ipha->ipha_dst; 1424 1425 cksum += (dst >> 16) + (dst & 0xFFFF); 1426 cksum += (src >> 16) + (src & 0xFFFF); 1427 cksum += htons(len); 1428 1429 cksum = (cksum >> 16) + (cksum & 0xFFFF); 1430 cksum = (cksum >> 16) + (cksum & 0xFFFF); 1431 1432 ASSERT(cksum <= 0xFFFF); 1433 1434 *stuff = (uint16_t)(cksum ? cksum : ~cksum); 1435 } 1436 1437 /* 1438 * Push a list of prepared packets (`txp') into the transmit ring. 1439 */ 1440 static xnf_txbuf_t * 1441 tx_push_packets(xnf_t *xnfp, xnf_txbuf_t *txp) 1442 { 1443 int slots_free; 1444 RING_IDX slot; 1445 boolean_t notify; 1446 1447 mutex_enter(&xnfp->xnf_txlock); 1448 1449 ASSERT(xnfp->xnf_running); 1450 1451 /* 1452 * Wait until we are connected to the backend. 1453 */ 1454 while (!xnfp->xnf_connected) 1455 cv_wait(&xnfp->xnf_cv_state, &xnfp->xnf_txlock); 1456 1457 slots_free = tx_slots_get(xnfp, 1, B_FALSE); 1458 DTRACE_PROBE1(xnf_send_slotsfree, int, slots_free); 1459 1460 slot = xnfp->xnf_tx_ring.req_prod_pvt; 1461 1462 while ((txp != NULL) && (slots_free > 0)) { 1463 xnf_txid_t *tidp; 1464 netif_tx_request_t *txrp; 1465 1466 tidp = txid_get(xnfp); 1467 VERIFY(tidp != NULL); 1468 1469 txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot); 1470 1471 txp->tx_slot = slot; 1472 txp->tx_txreq.id = tidp->id; 1473 *txrp = txp->tx_txreq; 1474 1475 tidp->txbuf = txp; 1476 1477 xnfp->xnf_stat_opackets++; 1478 xnfp->xnf_stat_obytes += txp->tx_txreq.size; 1479 1480 txp = txp->tx_next; 1481 slots_free--; 1482 slot++; 1483 1484 } 1485 1486 xnfp->xnf_tx_ring.req_prod_pvt = slot; 1487 1488 /* 1489 * Tell the peer that we sent something, if it cares. 1490 */ 1491 /* LINTED: constant in conditional context */ 1492 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring, 1493 notify); 1494 if (notify) 1495 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1496 1497 mutex_exit(&xnfp->xnf_txlock); 1498 1499 return (txp); 1500 } 1501 1502 /* 1503 * Send the chain of packets `mp'. Called by the MAC framework. 1504 */ 1505 static mblk_t * 1506 xnf_send(void *arg, mblk_t *mp) 1507 { 1508 xnf_t *xnfp = arg; 1509 domid_t oeid; 1510 xnf_txbuf_t *head, *tail; 1511 mblk_t *ml; 1512 int prepared; 1513 1514 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 1515 1516 /* 1517 * Prepare packets for transmission. 1518 */ 1519 head = tail = NULL; 1520 prepared = 0; 1521 while (mp != NULL) { 1522 xnf_txbuf_t *txp; 1523 int n_chunks, length; 1524 boolean_t page_oops; 1525 uint32_t pflags; 1526 1527 for (ml = mp, n_chunks = length = 0, page_oops = B_FALSE; 1528 ml != NULL; 1529 ml = ml->b_cont, n_chunks++) { 1530 1531 /* 1532 * Test if this buffer includes a page 1533 * boundary. The test assumes that the range 1534 * b_rptr...b_wptr can include only a single 1535 * boundary. 1536 */ 1537 if (xnf_btop((size_t)ml->b_rptr) != 1538 xnf_btop((size_t)ml->b_wptr)) { 1539 xnfp->xnf_stat_tx_pagebndry++; 1540 page_oops = B_TRUE; 1541 } 1542 1543 length += MBLKL(ml); 1544 } 1545 DTRACE_PROBE1(xnf_send_b_cont, int, n_chunks); 1546 1547 /* 1548 * Make sure packet isn't too large. 1549 */ 1550 if (length > XNF_FRAMESIZE) { 1551 cmn_err(CE_WARN, 1552 "xnf%d: oversized packet (%d bytes) dropped", 1553 ddi_get_instance(xnfp->xnf_devinfo), length); 1554 freemsg(mp); 1555 continue; 1556 } 1557 1558 txp = kmem_cache_alloc(xnfp->xnf_tx_buf_cache, KM_SLEEP); 1559 1560 txp->tx_type = TX_DATA; 1561 1562 if ((n_chunks > xnf_max_tx_frags) || page_oops) { 1563 /* 1564 * Loan a side buffer rather than the mblk 1565 * itself. 1566 */ 1567 txp->tx_bdesc = xnf_tx_pullup(xnfp, mp); 1568 if (txp->tx_bdesc == NULL) { 1569 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1570 break; 1571 } 1572 1573 txp->tx_bufp = txp->tx_bdesc->buf; 1574 txp->tx_mfn = txp->tx_bdesc->buf_mfn; 1575 txp->tx_txreq.gref = txp->tx_bdesc->grant_ref; 1576 1577 } else { 1578 int rc; 1579 ddi_dma_cookie_t dma_cookie; 1580 uint_t ncookies; 1581 1582 rc = ddi_dma_addr_bind_handle(txp->tx_dma_handle, 1583 NULL, (char *)mp->b_rptr, length, 1584 DDI_DMA_WRITE | DDI_DMA_STREAMING, 1585 DDI_DMA_DONTWAIT, 0, &dma_cookie, 1586 &ncookies); 1587 if (rc != DDI_DMA_MAPPED) { 1588 ASSERT(rc != DDI_DMA_INUSE); 1589 ASSERT(rc != DDI_DMA_PARTIAL_MAP); 1590 1591 #ifdef XNF_DEBUG 1592 if (rc != DDI_DMA_NORESOURCES) 1593 cmn_err(CE_WARN, 1594 "xnf%d: bind_handle failed (%x)", 1595 ddi_get_instance(xnfp->xnf_devinfo), 1596 rc); 1597 #endif 1598 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1599 break; 1600 } 1601 ASSERT(ncookies == 1); 1602 1603 txp->tx_bdesc = NULL; 1604 txp->tx_bufp = (caddr_t)mp->b_rptr; 1605 txp->tx_mfn = 1606 xnf_btop(pa_to_ma(dma_cookie.dmac_laddress)); 1607 txp->tx_txreq.gref = gref_get(xnfp); 1608 if (txp->tx_txreq.gref == INVALID_GRANT_REF) { 1609 (void) ddi_dma_unbind_handle( 1610 txp->tx_dma_handle); 1611 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1612 break; 1613 } 1614 gnttab_grant_foreign_access_ref(txp->tx_txreq.gref, 1615 oeid, txp->tx_mfn, 1); 1616 } 1617 1618 txp->tx_next = NULL; 1619 txp->tx_mp = mp; 1620 txp->tx_txreq.size = length; 1621 txp->tx_txreq.offset = (uintptr_t)txp->tx_bufp & PAGEOFFSET; 1622 txp->tx_txreq.flags = 0; 1623 mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags); 1624 if (pflags != 0) { 1625 /* 1626 * If the local protocol stack requests checksum 1627 * offload we set the 'checksum blank' flag, 1628 * indicating to the peer that we need the checksum 1629 * calculated for us. 1630 * 1631 * We _don't_ set the validated flag, because we haven't 1632 * validated that the data and the checksum match. 1633 */ 1634 xnf_pseudo_cksum(txp->tx_bufp, length); 1635 txp->tx_txreq.flags |= NETTXF_csum_blank; 1636 1637 xnfp->xnf_stat_tx_cksum_deferred++; 1638 } 1639 1640 if (head == NULL) { 1641 ASSERT(tail == NULL); 1642 1643 head = txp; 1644 } else { 1645 ASSERT(tail != NULL); 1646 1647 tail->tx_next = txp; 1648 } 1649 tail = txp; 1650 1651 mp = mp->b_next; 1652 prepared++; 1653 1654 /* 1655 * There is no point in preparing more than 1656 * NET_TX_RING_SIZE, as we won't be able to push them 1657 * into the ring in one go and would hence have to 1658 * un-prepare the extra. 1659 */ 1660 if (prepared == NET_TX_RING_SIZE) 1661 break; 1662 } 1663 1664 DTRACE_PROBE1(xnf_send_prepared, int, prepared); 1665 1666 if (mp != NULL) { 1667 #ifdef XNF_DEBUG 1668 int notprepared = 0; 1669 mblk_t *l = mp; 1670 1671 while (l != NULL) { 1672 notprepared++; 1673 l = l->b_next; 1674 } 1675 1676 DTRACE_PROBE1(xnf_send_notprepared, int, notprepared); 1677 #else /* !XNF_DEBUG */ 1678 DTRACE_PROBE1(xnf_send_notprepared, int, -1); 1679 #endif /* XNF_DEBUG */ 1680 } 1681 1682 /* 1683 * Push the packets we have prepared into the ring. They may 1684 * not all go. 1685 */ 1686 if (head != NULL) 1687 head = tx_push_packets(xnfp, head); 1688 1689 /* 1690 * If some packets that we prepared were not sent, unprepare 1691 * them and add them back to the head of those we didn't 1692 * prepare. 1693 */ 1694 { 1695 xnf_txbuf_t *loop; 1696 mblk_t *mp_head, *mp_tail; 1697 int unprepared = 0; 1698 1699 mp_head = mp_tail = NULL; 1700 loop = head; 1701 1702 while (loop != NULL) { 1703 xnf_txbuf_t *next = loop->tx_next; 1704 1705 if (loop->tx_bdesc == NULL) { 1706 (void) gnttab_end_foreign_access_ref( 1707 loop->tx_txreq.gref, 1); 1708 gref_put(xnfp, loop->tx_txreq.gref); 1709 (void) ddi_dma_unbind_handle( 1710 loop->tx_dma_handle); 1711 } else { 1712 xnf_buf_put(xnfp, loop->tx_bdesc, B_TRUE); 1713 } 1714 1715 ASSERT(loop->tx_mp != NULL); 1716 if (mp_head == NULL) 1717 mp_head = loop->tx_mp; 1718 mp_tail = loop->tx_mp; 1719 1720 kmem_cache_free(xnfp->xnf_tx_buf_cache, loop); 1721 loop = next; 1722 unprepared++; 1723 } 1724 1725 if (mp_tail == NULL) { 1726 ASSERT(mp_head == NULL); 1727 } else { 1728 ASSERT(mp_head != NULL); 1729 1730 mp_tail->b_next = mp; 1731 mp = mp_head; 1732 } 1733 1734 DTRACE_PROBE1(xnf_send_unprepared, int, unprepared); 1735 } 1736 1737 /* 1738 * If any mblks are left then we have deferred for some reason 1739 * and need to ask for a re-schedule later. This is typically 1740 * due to the ring filling. 1741 */ 1742 if (mp != NULL) { 1743 mutex_enter(&xnfp->xnf_schedlock); 1744 xnfp->xnf_need_sched = B_TRUE; 1745 mutex_exit(&xnfp->xnf_schedlock); 1746 1747 xnfp->xnf_stat_tx_defer++; 1748 } 1749 1750 return (mp); 1751 } 1752 1753 /* 1754 * Notification of RX packets. Currently no TX-complete interrupt is 1755 * used, as we clean the TX ring lazily. 1756 */ 1757 static uint_t 1758 xnf_intr(caddr_t arg) 1759 { 1760 xnf_t *xnfp = (xnf_t *)arg; 1761 mblk_t *mp; 1762 boolean_t need_sched, clean_ring; 1763 1764 mutex_enter(&xnfp->xnf_rxlock); 1765 1766 /* 1767 * Interrupts before we are connected are spurious. 1768 */ 1769 if (!xnfp->xnf_connected) { 1770 mutex_exit(&xnfp->xnf_rxlock); 1771 xnfp->xnf_stat_unclaimed_interrupts++; 1772 return (DDI_INTR_UNCLAIMED); 1773 } 1774 1775 /* 1776 * Receive side processing. 1777 */ 1778 do { 1779 /* 1780 * Collect buffers from the ring. 1781 */ 1782 xnf_rx_collect(xnfp); 1783 1784 /* 1785 * Interrupt me when the next receive buffer is consumed. 1786 */ 1787 xnfp->xnf_rx_ring.sring->rsp_event = 1788 xnfp->xnf_rx_ring.rsp_cons + 1; 1789 xen_mb(); 1790 1791 } while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)); 1792 1793 if (xnfp->xnf_rx_new_buffers_posted) { 1794 boolean_t notify; 1795 1796 /* 1797 * Indicate to the peer that we have re-filled the 1798 * receive ring, if it cares. 1799 */ 1800 /* LINTED: constant in conditional context */ 1801 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify); 1802 if (notify) 1803 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1804 xnfp->xnf_rx_new_buffers_posted = B_FALSE; 1805 } 1806 1807 mp = xnfp->xnf_rx_head; 1808 xnfp->xnf_rx_head = xnfp->xnf_rx_tail = NULL; 1809 1810 xnfp->xnf_stat_interrupts++; 1811 mutex_exit(&xnfp->xnf_rxlock); 1812 1813 if (mp != NULL) 1814 mac_rx(xnfp->xnf_mh, NULL, mp); 1815 1816 /* 1817 * Transmit side processing. 1818 * 1819 * If a previous transmit attempt failed or we have pending 1820 * multicast requests, clean the ring. 1821 * 1822 * If we previously stalled transmission and cleaning produces 1823 * some free slots, tell upstream to attempt sending again. 1824 * 1825 * The odd style is to avoid acquiring xnf_txlock unless we 1826 * will actually look inside the tx machinery. 1827 */ 1828 mutex_enter(&xnfp->xnf_schedlock); 1829 need_sched = xnfp->xnf_need_sched; 1830 clean_ring = need_sched || (xnfp->xnf_pending_multicast > 0); 1831 mutex_exit(&xnfp->xnf_schedlock); 1832 1833 if (clean_ring) { 1834 int free_slots; 1835 1836 mutex_enter(&xnfp->xnf_txlock); 1837 free_slots = tx_slots_get(xnfp, 0, B_FALSE); 1838 1839 if (need_sched && (free_slots > 0)) { 1840 mutex_enter(&xnfp->xnf_schedlock); 1841 xnfp->xnf_need_sched = B_FALSE; 1842 mutex_exit(&xnfp->xnf_schedlock); 1843 1844 mac_tx_update(xnfp->xnf_mh); 1845 } 1846 mutex_exit(&xnfp->xnf_txlock); 1847 } 1848 1849 return (DDI_INTR_CLAIMED); 1850 } 1851 1852 /* 1853 * xnf_start() -- start the board receiving and enable interrupts. 1854 */ 1855 static int 1856 xnf_start(void *arg) 1857 { 1858 xnf_t *xnfp = arg; 1859 1860 #ifdef XNF_DEBUG 1861 if (xnf_debug & XNF_DEBUG_TRACE) 1862 printf("xnf%d start(0x%p)\n", 1863 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1864 #endif 1865 1866 mutex_enter(&xnfp->xnf_rxlock); 1867 mutex_enter(&xnfp->xnf_txlock); 1868 1869 /* Accept packets from above. */ 1870 xnfp->xnf_running = B_TRUE; 1871 1872 mutex_exit(&xnfp->xnf_txlock); 1873 mutex_exit(&xnfp->xnf_rxlock); 1874 1875 return (0); 1876 } 1877 1878 /* xnf_stop() - disable hardware */ 1879 static void 1880 xnf_stop(void *arg) 1881 { 1882 xnf_t *xnfp = arg; 1883 1884 #ifdef XNF_DEBUG 1885 if (xnf_debug & XNF_DEBUG_TRACE) 1886 printf("xnf%d stop(0x%p)\n", 1887 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1888 #endif 1889 1890 mutex_enter(&xnfp->xnf_rxlock); 1891 mutex_enter(&xnfp->xnf_txlock); 1892 1893 xnfp->xnf_running = B_FALSE; 1894 1895 mutex_exit(&xnfp->xnf_txlock); 1896 mutex_exit(&xnfp->xnf_rxlock); 1897 } 1898 1899 /* 1900 * Hang buffer `bdesc' on the RX ring. 1901 */ 1902 static void 1903 xnf_rxbuf_hang(xnf_t *xnfp, xnf_buf_t *bdesc) 1904 { 1905 netif_rx_request_t *reqp; 1906 RING_IDX hang_ix; 1907 1908 ASSERT(MUTEX_HELD(&xnfp->xnf_rxlock)); 1909 1910 reqp = RING_GET_REQUEST(&xnfp->xnf_rx_ring, 1911 xnfp->xnf_rx_ring.req_prod_pvt); 1912 hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0)); 1913 ASSERT(xnfp->xnf_rx_pkt_info[hang_ix] == NULL); 1914 1915 reqp->id = bdesc->id = hang_ix; 1916 reqp->gref = bdesc->grant_ref; 1917 1918 xnfp->xnf_rx_pkt_info[hang_ix] = bdesc; 1919 xnfp->xnf_rx_ring.req_prod_pvt++; 1920 1921 xnfp->xnf_rx_new_buffers_posted = B_TRUE; 1922 } 1923 1924 /* 1925 * Collect packets from the RX ring, storing them in `xnfp' for later 1926 * use. 1927 */ 1928 static void 1929 xnf_rx_collect(xnf_t *xnfp) 1930 { 1931 mblk_t *head, *tail; 1932 1933 ASSERT(MUTEX_HELD(&xnfp->xnf_rxlock)); 1934 1935 /* 1936 * Loop over unconsumed responses: 1937 * 1. get a response 1938 * 2. take corresponding buffer off recv. ring 1939 * 3. indicate this by setting slot to NULL 1940 * 4. create a new message and 1941 * 5. copy data in, adjust ptr 1942 */ 1943 1944 head = tail = NULL; 1945 1946 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1947 netif_rx_response_t *rxpkt; 1948 xnf_buf_t *bdesc; 1949 ssize_t len; 1950 size_t off; 1951 mblk_t *mp = NULL; 1952 boolean_t hwcsum = B_FALSE; 1953 grant_ref_t ref; 1954 1955 /* 1. */ 1956 rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring, 1957 xnfp->xnf_rx_ring.rsp_cons); 1958 1959 DTRACE_PROBE4(xnf_rx_got_rsp, int, (int)rxpkt->id, 1960 int, (int)rxpkt->offset, 1961 int, (int)rxpkt->flags, 1962 int, (int)rxpkt->status); 1963 1964 /* 1965 * 2. 1966 */ 1967 bdesc = xnfp->xnf_rx_pkt_info[rxpkt->id]; 1968 1969 /* 1970 * 3. 1971 */ 1972 xnfp->xnf_rx_pkt_info[rxpkt->id] = NULL; 1973 ASSERT(bdesc->id == rxpkt->id); 1974 1975 ref = bdesc->grant_ref; 1976 off = rxpkt->offset; 1977 len = rxpkt->status; 1978 1979 if (!xnfp->xnf_running) { 1980 DTRACE_PROBE4(xnf_rx_not_running, 1981 int, rxpkt->status, 1982 char *, bdesc->buf, int, rxpkt->offset, 1983 char *, ((char *)bdesc->buf) + rxpkt->offset); 1984 1985 xnfp->xnf_stat_drop++; 1986 1987 } else if (len <= 0) { 1988 DTRACE_PROBE4(xnf_rx_pkt_status_negative, 1989 int, rxpkt->status, 1990 char *, bdesc->buf, int, rxpkt->offset, 1991 char *, ((char *)bdesc->buf) + rxpkt->offset); 1992 1993 xnfp->xnf_stat_errrx++; 1994 1995 switch (len) { 1996 case 0: 1997 xnfp->xnf_stat_runt++; 1998 break; 1999 case NETIF_RSP_ERROR: 2000 xnfp->xnf_stat_mac_rcv_error++; 2001 break; 2002 case NETIF_RSP_DROPPED: 2003 xnfp->xnf_stat_norxbuf++; 2004 break; 2005 } 2006 2007 } else if (bdesc->grant_ref == INVALID_GRANT_REF) { 2008 cmn_err(CE_WARN, "Bad rx grant reference %d " 2009 "from domain %d", ref, 2010 xvdi_get_oeid(xnfp->xnf_devinfo)); 2011 2012 } else if ((off + len) > PAGESIZE) { 2013 cmn_err(CE_WARN, "Rx packet overflows page " 2014 "(offset %ld, length %ld) from domain %d", 2015 off, len, xvdi_get_oeid(xnfp->xnf_devinfo)); 2016 } else { 2017 xnf_buf_t *nbuf = NULL; 2018 2019 DTRACE_PROBE4(xnf_rx_packet, int, len, 2020 char *, bdesc->buf, int, off, 2021 char *, ((char *)bdesc->buf) + off); 2022 2023 ASSERT(off + len <= PAGEOFFSET); 2024 2025 if (rxpkt->flags & NETRXF_data_validated) 2026 hwcsum = B_TRUE; 2027 2028 /* 2029 * If the packet is below a pre-determined 2030 * size we will copy data out rather than 2031 * replace it. 2032 */ 2033 if (len > xnf_rx_copy_limit) 2034 nbuf = xnf_buf_get(xnfp, KM_NOSLEEP, B_FALSE); 2035 2036 /* 2037 * If we have a replacement buffer, attempt to 2038 * wrap the existing one with an mblk_t in 2039 * order that the upper layers of the stack 2040 * might use it directly. 2041 */ 2042 if (nbuf != NULL) { 2043 mp = desballoc((unsigned char *)bdesc->buf, 2044 bdesc->len, 0, &bdesc->free_rtn); 2045 if (mp == NULL) { 2046 xnfp->xnf_stat_rx_desballoc_fail++; 2047 xnfp->xnf_stat_norxbuf++; 2048 2049 xnf_buf_put(xnfp, nbuf, B_FALSE); 2050 nbuf = NULL; 2051 } else { 2052 mp->b_rptr = mp->b_rptr + off; 2053 mp->b_wptr = mp->b_rptr + len; 2054 2055 /* 2056 * Release the grant reference 2057 * associated with this buffer 2058 * - they are scarce and the 2059 * upper layers of the stack 2060 * don't need it. 2061 */ 2062 (void) gnttab_end_foreign_access_ref( 2063 bdesc->grant_ref, 0); 2064 gref_put(xnfp, bdesc->grant_ref); 2065 bdesc->grant_ref = INVALID_GRANT_REF; 2066 2067 bdesc = nbuf; 2068 } 2069 } 2070 2071 if (nbuf == NULL) { 2072 /* 2073 * No replacement buffer allocated - 2074 * attempt to copy the data out and 2075 * re-hang the existing buffer. 2076 */ 2077 2078 /* 4. */ 2079 mp = allocb(len, BPRI_MED); 2080 if (mp == NULL) { 2081 xnfp->xnf_stat_rx_allocb_fail++; 2082 xnfp->xnf_stat_norxbuf++; 2083 } else { 2084 /* 5. */ 2085 bcopy(bdesc->buf + off, mp->b_wptr, 2086 len); 2087 mp->b_wptr += len; 2088 } 2089 } 2090 } 2091 2092 /* Re-hang the buffer. */ 2093 xnf_rxbuf_hang(xnfp, bdesc); 2094 2095 if (mp != NULL) { 2096 if (hwcsum) { 2097 /* 2098 * If the peer says that the data has 2099 * been validated then we declare that 2100 * the full checksum has been 2101 * verified. 2102 * 2103 * We don't look at the "checksum 2104 * blank" flag, and hence could have a 2105 * packet here that we are asserting 2106 * is good with a blank checksum. 2107 */ 2108 mac_hcksum_set(mp, 0, 0, 0, 0, 2109 HCK_FULLCKSUM_OK); 2110 xnfp->xnf_stat_rx_cksum_no_need++; 2111 } 2112 if (head == NULL) { 2113 ASSERT(tail == NULL); 2114 2115 head = mp; 2116 } else { 2117 ASSERT(tail != NULL); 2118 2119 tail->b_next = mp; 2120 } 2121 tail = mp; 2122 2123 ASSERT(mp->b_next == NULL); 2124 2125 xnfp->xnf_stat_ipackets++; 2126 xnfp->xnf_stat_rbytes += len; 2127 } 2128 2129 xnfp->xnf_rx_ring.rsp_cons++; 2130 } 2131 2132 /* 2133 * Store the mblks we have collected. 2134 */ 2135 if (head != NULL) { 2136 ASSERT(tail != NULL); 2137 2138 if (xnfp->xnf_rx_head == NULL) { 2139 ASSERT(xnfp->xnf_rx_tail == NULL); 2140 2141 xnfp->xnf_rx_head = head; 2142 } else { 2143 ASSERT(xnfp->xnf_rx_tail != NULL); 2144 2145 xnfp->xnf_rx_tail->b_next = head; 2146 } 2147 xnfp->xnf_rx_tail = tail; 2148 } 2149 } 2150 2151 /* 2152 * xnf_alloc_dma_resources() -- initialize the drivers structures 2153 */ 2154 static int 2155 xnf_alloc_dma_resources(xnf_t *xnfp) 2156 { 2157 dev_info_t *devinfo = xnfp->xnf_devinfo; 2158 size_t len; 2159 ddi_dma_cookie_t dma_cookie; 2160 uint_t ncookies; 2161 int rc; 2162 caddr_t rptr; 2163 2164 /* 2165 * The code below allocates all the DMA data structures that 2166 * need to be released when the driver is detached. 2167 * 2168 * Allocate page for the transmit descriptor ring. 2169 */ 2170 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 2171 DDI_DMA_SLEEP, 0, &xnfp->xnf_tx_ring_dma_handle) != DDI_SUCCESS) 2172 goto alloc_error; 2173 2174 if (ddi_dma_mem_alloc(xnfp->xnf_tx_ring_dma_handle, 2175 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 2176 DDI_DMA_SLEEP, 0, &rptr, &len, 2177 &xnfp->xnf_tx_ring_dma_acchandle) != DDI_SUCCESS) { 2178 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2179 xnfp->xnf_tx_ring_dma_handle = NULL; 2180 goto alloc_error; 2181 } 2182 2183 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_tx_ring_dma_handle, NULL, 2184 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 2185 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 2186 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 2187 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2188 xnfp->xnf_tx_ring_dma_handle = NULL; 2189 xnfp->xnf_tx_ring_dma_acchandle = NULL; 2190 if (rc == DDI_DMA_NORESOURCES) 2191 goto alloc_error; 2192 else 2193 goto error; 2194 } 2195 2196 ASSERT(ncookies == 1); 2197 bzero(rptr, PAGESIZE); 2198 /* LINTED: constant in conditional context */ 2199 SHARED_RING_INIT((netif_tx_sring_t *)rptr); 2200 /* LINTED: constant in conditional context */ 2201 FRONT_RING_INIT(&xnfp->xnf_tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE); 2202 xnfp->xnf_tx_ring_phys_addr = dma_cookie.dmac_laddress; 2203 2204 /* 2205 * Allocate page for the receive descriptor ring. 2206 */ 2207 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 2208 DDI_DMA_SLEEP, 0, &xnfp->xnf_rx_ring_dma_handle) != DDI_SUCCESS) 2209 goto alloc_error; 2210 2211 if (ddi_dma_mem_alloc(xnfp->xnf_rx_ring_dma_handle, 2212 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 2213 DDI_DMA_SLEEP, 0, &rptr, &len, 2214 &xnfp->xnf_rx_ring_dma_acchandle) != DDI_SUCCESS) { 2215 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2216 xnfp->xnf_rx_ring_dma_handle = NULL; 2217 goto alloc_error; 2218 } 2219 2220 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_rx_ring_dma_handle, NULL, 2221 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 2222 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 2223 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 2224 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2225 xnfp->xnf_rx_ring_dma_handle = NULL; 2226 xnfp->xnf_rx_ring_dma_acchandle = NULL; 2227 if (rc == DDI_DMA_NORESOURCES) 2228 goto alloc_error; 2229 else 2230 goto error; 2231 } 2232 2233 ASSERT(ncookies == 1); 2234 bzero(rptr, PAGESIZE); 2235 /* LINTED: constant in conditional context */ 2236 SHARED_RING_INIT((netif_rx_sring_t *)rptr); 2237 /* LINTED: constant in conditional context */ 2238 FRONT_RING_INIT(&xnfp->xnf_rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE); 2239 xnfp->xnf_rx_ring_phys_addr = dma_cookie.dmac_laddress; 2240 2241 return (DDI_SUCCESS); 2242 2243 alloc_error: 2244 cmn_err(CE_WARN, "xnf%d: could not allocate enough DMA memory", 2245 ddi_get_instance(xnfp->xnf_devinfo)); 2246 error: 2247 xnf_release_dma_resources(xnfp); 2248 return (DDI_FAILURE); 2249 } 2250 2251 /* 2252 * Release all DMA resources in the opposite order from acquisition 2253 */ 2254 static void 2255 xnf_release_dma_resources(xnf_t *xnfp) 2256 { 2257 int i; 2258 2259 /* 2260 * Free receive buffers which are currently associated with 2261 * descriptors. 2262 */ 2263 mutex_enter(&xnfp->xnf_rxlock); 2264 for (i = 0; i < NET_RX_RING_SIZE; i++) { 2265 xnf_buf_t *bp; 2266 2267 if ((bp = xnfp->xnf_rx_pkt_info[i]) == NULL) 2268 continue; 2269 xnfp->xnf_rx_pkt_info[i] = NULL; 2270 xnf_buf_put(xnfp, bp, B_FALSE); 2271 } 2272 mutex_exit(&xnfp->xnf_rxlock); 2273 2274 /* Free the receive ring buffer. */ 2275 if (xnfp->xnf_rx_ring_dma_acchandle != NULL) { 2276 (void) ddi_dma_unbind_handle(xnfp->xnf_rx_ring_dma_handle); 2277 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 2278 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2279 xnfp->xnf_rx_ring_dma_acchandle = NULL; 2280 } 2281 /* Free the transmit ring buffer. */ 2282 if (xnfp->xnf_tx_ring_dma_acchandle != NULL) { 2283 (void) ddi_dma_unbind_handle(xnfp->xnf_tx_ring_dma_handle); 2284 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 2285 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2286 xnfp->xnf_tx_ring_dma_acchandle = NULL; 2287 } 2288 2289 } 2290 2291 /* 2292 * Release any packets and associated structures used by the TX ring. 2293 */ 2294 static void 2295 xnf_release_mblks(xnf_t *xnfp) 2296 { 2297 RING_IDX i; 2298 xnf_txid_t *tidp; 2299 2300 for (i = 0, tidp = &xnfp->xnf_tx_pkt_id[0]; 2301 i < NET_TX_RING_SIZE; 2302 i++, tidp++) { 2303 xnf_txbuf_t *txp = tidp->txbuf; 2304 2305 if (txp != NULL) { 2306 ASSERT(txp->tx_mp != NULL); 2307 freemsg(txp->tx_mp); 2308 2309 txid_put(xnfp, tidp); 2310 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 2311 } 2312 } 2313 } 2314 2315 static int 2316 xnf_buf_constructor(void *buf, void *arg, int kmflag) 2317 { 2318 int (*ddiflags)(caddr_t) = DDI_DMA_SLEEP; 2319 xnf_buf_t *bdesc = buf; 2320 xnf_t *xnfp = arg; 2321 ddi_dma_cookie_t dma_cookie; 2322 uint_t ncookies; 2323 size_t len; 2324 2325 if (kmflag & KM_NOSLEEP) 2326 ddiflags = DDI_DMA_DONTWAIT; 2327 2328 /* Allocate a DMA access handle for the buffer. */ 2329 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &buf_dma_attr, 2330 ddiflags, 0, &bdesc->dma_handle) != DDI_SUCCESS) 2331 goto failure; 2332 2333 /* Allocate DMA-able memory for buffer. */ 2334 if (ddi_dma_mem_alloc(bdesc->dma_handle, 2335 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, ddiflags, 0, 2336 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 2337 goto failure_1; 2338 2339 /* Bind to virtual address of buffer to get physical address. */ 2340 if (ddi_dma_addr_bind_handle(bdesc->dma_handle, NULL, 2341 bdesc->buf, len, DDI_DMA_RDWR | DDI_DMA_STREAMING, 2342 ddiflags, 0, &dma_cookie, &ncookies) != DDI_DMA_MAPPED) 2343 goto failure_2; 2344 ASSERT(ncookies == 1); 2345 2346 bdesc->free_rtn.free_func = xnf_buf_recycle; 2347 bdesc->free_rtn.free_arg = (caddr_t)bdesc; 2348 bdesc->xnfp = xnfp; 2349 bdesc->buf_phys = dma_cookie.dmac_laddress; 2350 bdesc->buf_mfn = pfn_to_mfn(xnf_btop(bdesc->buf_phys)); 2351 bdesc->len = dma_cookie.dmac_size; 2352 bdesc->grant_ref = INVALID_GRANT_REF; 2353 bdesc->gen = xnfp->xnf_gen; 2354 2355 atomic_add_64(&xnfp->xnf_stat_buf_allocated, 1); 2356 2357 return (0); 2358 2359 failure_2: 2360 ddi_dma_mem_free(&bdesc->acc_handle); 2361 2362 failure_1: 2363 ddi_dma_free_handle(&bdesc->dma_handle); 2364 2365 failure: 2366 2367 ASSERT(kmflag & KM_NOSLEEP); /* Cannot fail for KM_SLEEP. */ 2368 return (-1); 2369 } 2370 2371 static void 2372 xnf_buf_destructor(void *buf, void *arg) 2373 { 2374 xnf_buf_t *bdesc = buf; 2375 xnf_t *xnfp = arg; 2376 2377 (void) ddi_dma_unbind_handle(bdesc->dma_handle); 2378 ddi_dma_mem_free(&bdesc->acc_handle); 2379 ddi_dma_free_handle(&bdesc->dma_handle); 2380 2381 atomic_add_64(&xnfp->xnf_stat_buf_allocated, -1); 2382 } 2383 2384 static xnf_buf_t * 2385 xnf_buf_get(xnf_t *xnfp, int flags, boolean_t readonly) 2386 { 2387 grant_ref_t gref; 2388 xnf_buf_t *bufp; 2389 2390 /* 2391 * Usually grant references are more scarce than memory, so we 2392 * attempt to acquire a grant reference first. 2393 */ 2394 gref = gref_get(xnfp); 2395 if (gref == INVALID_GRANT_REF) 2396 return (NULL); 2397 2398 bufp = kmem_cache_alloc(xnfp->xnf_buf_cache, flags); 2399 if (bufp == NULL) { 2400 gref_put(xnfp, gref); 2401 return (NULL); 2402 } 2403 2404 ASSERT(bufp->grant_ref == INVALID_GRANT_REF); 2405 2406 bufp->grant_ref = gref; 2407 2408 if (bufp->gen != xnfp->xnf_gen) 2409 xnf_buf_refresh(bufp); 2410 2411 gnttab_grant_foreign_access_ref(bufp->grant_ref, 2412 xvdi_get_oeid(bufp->xnfp->xnf_devinfo), 2413 bufp->buf_mfn, readonly ? 1 : 0); 2414 2415 atomic_add_64(&xnfp->xnf_stat_buf_outstanding, 1); 2416 2417 return (bufp); 2418 } 2419 2420 static void 2421 xnf_buf_put(xnf_t *xnfp, xnf_buf_t *bufp, boolean_t readonly) 2422 { 2423 if (bufp->grant_ref != INVALID_GRANT_REF) { 2424 (void) gnttab_end_foreign_access_ref( 2425 bufp->grant_ref, readonly ? 1 : 0); 2426 gref_put(xnfp, bufp->grant_ref); 2427 bufp->grant_ref = INVALID_GRANT_REF; 2428 } 2429 2430 kmem_cache_free(xnfp->xnf_buf_cache, bufp); 2431 2432 atomic_add_64(&xnfp->xnf_stat_buf_outstanding, -1); 2433 } 2434 2435 /* 2436 * Refresh any cached data about a buffer after resume. 2437 */ 2438 static void 2439 xnf_buf_refresh(xnf_buf_t *bdesc) 2440 { 2441 bdesc->buf_mfn = pfn_to_mfn(xnf_btop(bdesc->buf_phys)); 2442 bdesc->gen = bdesc->xnfp->xnf_gen; 2443 } 2444 2445 /* 2446 * Streams `freeb' routine for `xnf_buf_t' when used as transmit 2447 * look-aside buffers. 2448 */ 2449 static void 2450 xnf_buf_recycle(xnf_buf_t *bdesc) 2451 { 2452 xnf_t *xnfp = bdesc->xnfp; 2453 2454 xnf_buf_put(xnfp, bdesc, B_TRUE); 2455 } 2456 2457 static int 2458 xnf_tx_buf_constructor(void *buf, void *arg, int kmflag) 2459 { 2460 int (*ddiflags)(caddr_t) = DDI_DMA_SLEEP; 2461 xnf_txbuf_t *txp = buf; 2462 xnf_t *xnfp = arg; 2463 2464 if (kmflag & KM_NOSLEEP) 2465 ddiflags = DDI_DMA_DONTWAIT; 2466 2467 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &buf_dma_attr, 2468 ddiflags, 0, &txp->tx_dma_handle) != DDI_SUCCESS) { 2469 ASSERT(kmflag & KM_NOSLEEP); /* Cannot fail for KM_SLEEP. */ 2470 return (-1); 2471 } 2472 2473 return (0); 2474 } 2475 2476 static void 2477 xnf_tx_buf_destructor(void *buf, void *arg) 2478 { 2479 _NOTE(ARGUNUSED(arg)); 2480 xnf_txbuf_t *txp = buf; 2481 2482 ddi_dma_free_handle(&txp->tx_dma_handle); 2483 } 2484 2485 /* 2486 * Statistics. 2487 */ 2488 static char *xnf_aux_statistics[] = { 2489 "tx_cksum_deferred", 2490 "rx_cksum_no_need", 2491 "interrupts", 2492 "unclaimed_interrupts", 2493 "tx_pullup", 2494 "tx_pagebndry", 2495 "tx_attempt", 2496 "buf_allocated", 2497 "buf_outstanding", 2498 "gref_outstanding", 2499 "gref_failure", 2500 "gref_peak", 2501 "rx_allocb_fail", 2502 "rx_desballoc_fail", 2503 }; 2504 2505 static int 2506 xnf_kstat_aux_update(kstat_t *ksp, int flag) 2507 { 2508 xnf_t *xnfp; 2509 kstat_named_t *knp; 2510 2511 if (flag != KSTAT_READ) 2512 return (EACCES); 2513 2514 xnfp = ksp->ks_private; 2515 knp = ksp->ks_data; 2516 2517 /* 2518 * Assignment order must match that of the names in 2519 * xnf_aux_statistics. 2520 */ 2521 (knp++)->value.ui64 = xnfp->xnf_stat_tx_cksum_deferred; 2522 (knp++)->value.ui64 = xnfp->xnf_stat_rx_cksum_no_need; 2523 2524 (knp++)->value.ui64 = xnfp->xnf_stat_interrupts; 2525 (knp++)->value.ui64 = xnfp->xnf_stat_unclaimed_interrupts; 2526 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pullup; 2527 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pagebndry; 2528 (knp++)->value.ui64 = xnfp->xnf_stat_tx_attempt; 2529 2530 (knp++)->value.ui64 = xnfp->xnf_stat_buf_allocated; 2531 (knp++)->value.ui64 = xnfp->xnf_stat_buf_outstanding; 2532 (knp++)->value.ui64 = xnfp->xnf_stat_gref_outstanding; 2533 (knp++)->value.ui64 = xnfp->xnf_stat_gref_failure; 2534 (knp++)->value.ui64 = xnfp->xnf_stat_gref_peak; 2535 (knp++)->value.ui64 = xnfp->xnf_stat_rx_allocb_fail; 2536 (knp++)->value.ui64 = xnfp->xnf_stat_rx_desballoc_fail; 2537 2538 return (0); 2539 } 2540 2541 static boolean_t 2542 xnf_kstat_init(xnf_t *xnfp) 2543 { 2544 int nstat = sizeof (xnf_aux_statistics) / 2545 sizeof (xnf_aux_statistics[0]); 2546 char **cp = xnf_aux_statistics; 2547 kstat_named_t *knp; 2548 2549 /* 2550 * Create and initialise kstats. 2551 */ 2552 if ((xnfp->xnf_kstat_aux = kstat_create("xnf", 2553 ddi_get_instance(xnfp->xnf_devinfo), 2554 "aux_statistics", "net", KSTAT_TYPE_NAMED, 2555 nstat, 0)) == NULL) 2556 return (B_FALSE); 2557 2558 xnfp->xnf_kstat_aux->ks_private = xnfp; 2559 xnfp->xnf_kstat_aux->ks_update = xnf_kstat_aux_update; 2560 2561 knp = xnfp->xnf_kstat_aux->ks_data; 2562 while (nstat > 0) { 2563 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 2564 2565 knp++; 2566 cp++; 2567 nstat--; 2568 } 2569 2570 kstat_install(xnfp->xnf_kstat_aux); 2571 2572 return (B_TRUE); 2573 } 2574 2575 static int 2576 xnf_stat(void *arg, uint_t stat, uint64_t *val) 2577 { 2578 xnf_t *xnfp = arg; 2579 2580 mutex_enter(&xnfp->xnf_rxlock); 2581 mutex_enter(&xnfp->xnf_txlock); 2582 2583 #define mac_stat(q, r) \ 2584 case (MAC_STAT_##q): \ 2585 *val = xnfp->xnf_stat_##r; \ 2586 break 2587 2588 #define ether_stat(q, r) \ 2589 case (ETHER_STAT_##q): \ 2590 *val = xnfp->xnf_stat_##r; \ 2591 break 2592 2593 switch (stat) { 2594 2595 mac_stat(IPACKETS, ipackets); 2596 mac_stat(OPACKETS, opackets); 2597 mac_stat(RBYTES, rbytes); 2598 mac_stat(OBYTES, obytes); 2599 mac_stat(NORCVBUF, norxbuf); 2600 mac_stat(IERRORS, errrx); 2601 mac_stat(NOXMTBUF, tx_defer); 2602 2603 ether_stat(MACRCV_ERRORS, mac_rcv_error); 2604 ether_stat(TOOSHORT_ERRORS, runt); 2605 2606 /* always claim to be in full duplex mode */ 2607 case ETHER_STAT_LINK_DUPLEX: 2608 *val = LINK_DUPLEX_FULL; 2609 break; 2610 2611 /* always claim to be at 1Gb/s link speed */ 2612 case MAC_STAT_IFSPEED: 2613 *val = 1000000000ull; 2614 break; 2615 2616 default: 2617 mutex_exit(&xnfp->xnf_txlock); 2618 mutex_exit(&xnfp->xnf_rxlock); 2619 2620 return (ENOTSUP); 2621 } 2622 2623 #undef mac_stat 2624 #undef ether_stat 2625 2626 mutex_exit(&xnfp->xnf_txlock); 2627 mutex_exit(&xnfp->xnf_rxlock); 2628 2629 return (0); 2630 } 2631 2632 static boolean_t 2633 xnf_getcapab(void *arg, mac_capab_t cap, void *cap_data) 2634 { 2635 _NOTE(ARGUNUSED(arg)); 2636 2637 switch (cap) { 2638 case MAC_CAPAB_HCKSUM: { 2639 uint32_t *capab = cap_data; 2640 2641 /* 2642 * Whilst the flag used to communicate with the IO 2643 * domain is called "NETTXF_csum_blank", the checksum 2644 * in the packet must contain the pseudo-header 2645 * checksum and not zero. 2646 * 2647 * To help out the IO domain, we might use 2648 * HCKSUM_INET_PARTIAL. Unfortunately our stack will 2649 * then use checksum offload for IPv6 packets, which 2650 * the IO domain can't handle. 2651 * 2652 * As a result, we declare outselves capable of 2653 * HCKSUM_INET_FULL_V4. This means that we receive 2654 * IPv4 packets from the stack with a blank checksum 2655 * field and must insert the pseudo-header checksum 2656 * before passing the packet to the IO domain. 2657 */ 2658 *capab = HCKSUM_INET_FULL_V4; 2659 break; 2660 } 2661 default: 2662 return (B_FALSE); 2663 } 2664 2665 return (B_TRUE); 2666 } 2667 2668 /* 2669 * The state of the peer has changed - react accordingly. 2670 */ 2671 static void 2672 oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 2673 void *arg, void *impl_data) 2674 { 2675 _NOTE(ARGUNUSED(id, arg)); 2676 xnf_t *xnfp = ddi_get_driver_private(dip); 2677 XenbusState new_state = *(XenbusState *)impl_data; 2678 2679 ASSERT(xnfp != NULL); 2680 2681 switch (new_state) { 2682 case XenbusStateUnknown: 2683 case XenbusStateInitialising: 2684 case XenbusStateInitialised: 2685 case XenbusStateClosing: 2686 case XenbusStateClosed: 2687 case XenbusStateReconfiguring: 2688 case XenbusStateReconfigured: 2689 break; 2690 2691 case XenbusStateInitWait: 2692 xnf_read_config(xnfp); 2693 2694 if (!xnfp->xnf_be_rx_copy) { 2695 cmn_err(CE_WARN, 2696 "The xnf driver requires a dom0 that " 2697 "supports 'feature-rx-copy'."); 2698 (void) xvdi_switch_state(xnfp->xnf_devinfo, 2699 XBT_NULL, XenbusStateClosed); 2700 break; 2701 } 2702 2703 /* 2704 * Connect to the backend. 2705 */ 2706 xnf_be_connect(xnfp); 2707 2708 /* 2709 * Our MAC address as discovered by xnf_read_config(). 2710 */ 2711 mac_unicst_update(xnfp->xnf_mh, xnfp->xnf_mac_addr); 2712 2713 break; 2714 2715 case XenbusStateConnected: 2716 mutex_enter(&xnfp->xnf_rxlock); 2717 mutex_enter(&xnfp->xnf_txlock); 2718 2719 xnfp->xnf_connected = B_TRUE; 2720 /* 2721 * Wake up any threads waiting to send data to 2722 * backend. 2723 */ 2724 cv_broadcast(&xnfp->xnf_cv_state); 2725 2726 mutex_exit(&xnfp->xnf_txlock); 2727 mutex_exit(&xnfp->xnf_rxlock); 2728 2729 /* 2730 * Kick the peer in case it missed any transmits 2731 * request in the TX ring. 2732 */ 2733 ec_notify_via_evtchn(xnfp->xnf_evtchn); 2734 2735 /* 2736 * There may already be completed receive requests in 2737 * the ring sent by backend after it gets connected 2738 * but before we see its state change here, so we call 2739 * xnf_intr() to handle them, if any. 2740 */ 2741 (void) xnf_intr((caddr_t)xnfp); 2742 2743 /* 2744 * Mark the link up now that we are connected. 2745 */ 2746 mac_link_update(xnfp->xnf_mh, LINK_STATE_UP); 2747 2748 /* 2749 * Tell the backend about the multicast addresses in 2750 * which we are interested. 2751 */ 2752 mac_multicast_refresh(xnfp->xnf_mh, NULL, xnfp, B_TRUE); 2753 2754 break; 2755 2756 default: 2757 break; 2758 } 2759 } 2760