1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * 29 * Copyright (c) 2004 Christian Limpach. 30 * All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. This section intentionally left blank. 41 * 4. The name of the author may not be used to endorse or promote products 42 * derived from this software without specific prior written permission. 43 * 44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 49 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 53 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 */ 55 /* 56 * Section 3 of the above license was updated in response to bug 6379571. 57 */ 58 59 /* 60 * xnf.c - GLDv3 network driver for domU. 61 */ 62 63 /* 64 * This driver uses four per-instance locks: 65 * 66 * xnf_gref_lock: 67 * 68 * Protects access to the grant reference list stored in 69 * xnf_gref_head. Grant references should be acquired and released 70 * using gref_get() and gref_put() respectively. 71 * 72 * xnf_schedlock: 73 * 74 * Protects: 75 * xnf_need_sched - used to record that a previous transmit attempt 76 * failed (and consequently it will be necessary to call 77 * mac_tx_update() when transmit resources are available). 78 * xnf_pending_multicast - the number of multicast requests that 79 * have been submitted to the backend for which we have not 80 * processed responses. 81 * 82 * xnf_txlock: 83 * 84 * Protects the transmit ring (xnf_tx_ring) and associated 85 * structures (notably xnf_tx_pkt_id and xnf_tx_pkt_id_head). 86 * 87 * xnf_rxlock: 88 * 89 * Protects the receive ring (xnf_rx_ring) and associated 90 * structures (notably xnf_rx_pkt_info). 91 * 92 * If driver-global state that affects both the transmit and receive 93 * rings is manipulated, both xnf_txlock and xnf_rxlock should be 94 * held, in that order. 95 * 96 * xnf_schedlock is acquired both whilst holding xnf_txlock and 97 * without. It should always be acquired after xnf_txlock if both are 98 * held. 99 * 100 * Notes: 101 * - atomic_add_64() is used to manipulate counters where we require 102 * accuracy. For counters intended only for observation by humans, 103 * post increment/decrement are used instead. 104 */ 105 106 #include <sys/types.h> 107 #include <sys/errno.h> 108 #include <sys/param.h> 109 #include <sys/sysmacros.h> 110 #include <sys/systm.h> 111 #include <sys/stream.h> 112 #include <sys/strsubr.h> 113 #include <sys/strsun.h> 114 #include <sys/conf.h> 115 #include <sys/ddi.h> 116 #include <sys/devops.h> 117 #include <sys/sunddi.h> 118 #include <sys/sunndi.h> 119 #include <sys/dlpi.h> 120 #include <sys/ethernet.h> 121 #include <sys/strsun.h> 122 #include <sys/pattr.h> 123 #include <inet/ip.h> 124 #include <inet/ip_impl.h> 125 #include <sys/gld.h> 126 #include <sys/modctl.h> 127 #include <sys/mac_provider.h> 128 #include <sys/mac_ether.h> 129 #include <sys/bootinfo.h> 130 #include <sys/mach_mmu.h> 131 #ifdef XPV_HVM_DRIVER 132 #include <sys/xpv_support.h> 133 #include <sys/hypervisor.h> 134 #else 135 #include <sys/hypervisor.h> 136 #include <sys/evtchn_impl.h> 137 #include <sys/balloon_impl.h> 138 #endif 139 #include <xen/public/io/netif.h> 140 #include <sys/gnttab.h> 141 #include <xen/sys/xendev.h> 142 #include <sys/sdt.h> 143 #include <sys/note.h> 144 #include <sys/debug.h> 145 146 #include <io/xnf.h> 147 148 #if defined(DEBUG) || defined(__lint) 149 #define XNF_DEBUG 150 #endif 151 152 #ifdef XNF_DEBUG 153 int xnf_debug = 0; 154 xnf_t *xnf_debug_instance = NULL; 155 #endif 156 157 /* 158 * On a 32 bit PAE system physical and machine addresses are larger 159 * than 32 bits. ddi_btop() on such systems take an unsigned long 160 * argument, and so addresses above 4G are truncated before ddi_btop() 161 * gets to see them. To avoid this, code the shift operation here. 162 */ 163 #define xnf_btop(addr) ((addr) >> PAGESHIFT) 164 165 unsigned int xnf_max_tx_frags = 1; 166 167 /* 168 * Should we use the multicast control feature if the backend provides 169 * it? 170 */ 171 boolean_t xnf_multicast_control = B_TRUE; 172 173 /* 174 * Received packets below this size are copied to a new streams buffer 175 * rather than being desballoc'ed. 176 * 177 * This value is chosen to accommodate traffic where there are a large 178 * number of small packets. For data showing a typical distribution, 179 * see: 180 * 181 * Sinha07a: 182 * Rishi Sinha, Christos Papadopoulos, and John 183 * Heidemann. Internet Packet Size Distributions: Some 184 * Observations. Technical Report ISI-TR-2007-643, 185 * USC/Information Sciences Institute, May, 2007. Orignally 186 * released October 2005 as web page 187 * http://netweb.usc.edu/~sinha/pkt-sizes/. 188 * <http://www.isi.edu/~johnh/PAPERS/Sinha07a.html>. 189 */ 190 size_t xnf_rx_copy_limit = 64; 191 192 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) 193 #define INVALID_GRANT_REF ((grant_ref_t)-1) 194 #define INVALID_TX_ID ((uint16_t)-1) 195 196 #define TX_ID_TO_TXID(p, id) (&((p)->xnf_tx_pkt_id[(id)])) 197 #define TX_ID_VALID(i) (((i) != INVALID_TX_ID) && ((i) < NET_TX_RING_SIZE)) 198 199 /* Required system entry points */ 200 static int xnf_attach(dev_info_t *, ddi_attach_cmd_t); 201 static int xnf_detach(dev_info_t *, ddi_detach_cmd_t); 202 203 /* Required driver entry points for Nemo */ 204 static int xnf_start(void *); 205 static void xnf_stop(void *); 206 static int xnf_set_mac_addr(void *, const uint8_t *); 207 static int xnf_set_multicast(void *, boolean_t, const uint8_t *); 208 static int xnf_set_promiscuous(void *, boolean_t); 209 static mblk_t *xnf_send(void *, mblk_t *); 210 static uint_t xnf_intr(caddr_t); 211 static int xnf_stat(void *, uint_t, uint64_t *); 212 static boolean_t xnf_getcapab(void *, mac_capab_t, void *); 213 214 /* Driver private functions */ 215 static int xnf_alloc_dma_resources(xnf_t *); 216 static void xnf_release_dma_resources(xnf_t *); 217 static void xnf_release_mblks(xnf_t *); 218 219 static int xnf_buf_constructor(void *, void *, int); 220 static void xnf_buf_destructor(void *, void *); 221 static xnf_buf_t *xnf_buf_get(xnf_t *, int, boolean_t); 222 #pragma inline(xnf_buf_get) 223 static void xnf_buf_put(xnf_t *, xnf_buf_t *, boolean_t); 224 #pragma inline(xnf_buf_put) 225 static void xnf_buf_refresh(xnf_buf_t *); 226 #pragma inline(xnf_buf_refresh) 227 static void xnf_buf_recycle(xnf_buf_t *); 228 229 static int xnf_tx_buf_constructor(void *, void *, int); 230 static void xnf_tx_buf_destructor(void *, void *); 231 232 static grant_ref_t gref_get(xnf_t *); 233 #pragma inline(gref_get) 234 static void gref_put(xnf_t *, grant_ref_t); 235 #pragma inline(gref_put) 236 237 static xnf_txid_t *txid_get(xnf_t *); 238 #pragma inline(txid_get) 239 static void txid_put(xnf_t *, xnf_txid_t *); 240 #pragma inline(txid_put) 241 242 void xnf_send_driver_status(int, int); 243 static void xnf_rxbuf_hang(xnf_t *, xnf_buf_t *); 244 static int xnf_tx_clean_ring(xnf_t *); 245 static void oe_state_change(dev_info_t *, ddi_eventcookie_t, 246 void *, void *); 247 static boolean_t xnf_kstat_init(xnf_t *); 248 static void xnf_rx_collect(xnf_t *); 249 250 static mac_callbacks_t xnf_callbacks = { 251 MC_GETCAPAB, 252 xnf_stat, 253 xnf_start, 254 xnf_stop, 255 xnf_set_promiscuous, 256 xnf_set_multicast, 257 xnf_set_mac_addr, 258 xnf_send, 259 NULL, 260 xnf_getcapab 261 }; 262 263 /* DMA attributes for network ring buffer */ 264 static ddi_dma_attr_t ringbuf_dma_attr = { 265 DMA_ATTR_V0, /* version of this structure */ 266 0, /* lowest usable address */ 267 0xffffffffffffffffULL, /* highest usable address */ 268 0x7fffffff, /* maximum DMAable byte count */ 269 MMU_PAGESIZE, /* alignment in bytes */ 270 0x7ff, /* bitmap of burst sizes */ 271 1, /* minimum transfer */ 272 0xffffffffU, /* maximum transfer */ 273 0xffffffffffffffffULL, /* maximum segment length */ 274 1, /* maximum number of segments */ 275 1, /* granularity */ 276 0, /* flags (reserved) */ 277 }; 278 279 /* DMA attributes for transmit and receive data */ 280 static ddi_dma_attr_t buf_dma_attr = { 281 DMA_ATTR_V0, /* version of this structure */ 282 0, /* lowest usable address */ 283 0xffffffffffffffffULL, /* highest usable address */ 284 0x7fffffff, /* maximum DMAable byte count */ 285 MMU_PAGESIZE, /* alignment in bytes */ 286 0x7ff, /* bitmap of burst sizes */ 287 1, /* minimum transfer */ 288 0xffffffffU, /* maximum transfer */ 289 0xffffffffffffffffULL, /* maximum segment length */ 290 1, /* maximum number of segments */ 291 1, /* granularity */ 292 0, /* flags (reserved) */ 293 }; 294 295 /* DMA access attributes for registers and descriptors */ 296 static ddi_device_acc_attr_t accattr = { 297 DDI_DEVICE_ATTR_V0, 298 DDI_STRUCTURE_LE_ACC, /* This is a little-endian device */ 299 DDI_STRICTORDER_ACC 300 }; 301 302 /* DMA access attributes for data: NOT to be byte swapped. */ 303 static ddi_device_acc_attr_t data_accattr = { 304 DDI_DEVICE_ATTR_V0, 305 DDI_NEVERSWAP_ACC, 306 DDI_STRICTORDER_ACC 307 }; 308 309 DDI_DEFINE_STREAM_OPS(xnf_dev_ops, nulldev, nulldev, xnf_attach, xnf_detach, 310 nodev, NULL, D_MP, NULL, ddi_quiesce_not_supported); 311 312 static struct modldrv xnf_modldrv = { 313 &mod_driverops, 314 "Virtual Ethernet driver", 315 &xnf_dev_ops 316 }; 317 318 static struct modlinkage modlinkage = { 319 MODREV_1, &xnf_modldrv, NULL 320 }; 321 322 int 323 _init(void) 324 { 325 int r; 326 327 mac_init_ops(&xnf_dev_ops, "xnf"); 328 r = mod_install(&modlinkage); 329 if (r != DDI_SUCCESS) 330 mac_fini_ops(&xnf_dev_ops); 331 332 return (r); 333 } 334 335 int 336 _fini(void) 337 { 338 return (EBUSY); /* XXPV should be removable */ 339 } 340 341 int 342 _info(struct modinfo *modinfop) 343 { 344 return (mod_info(&modlinkage, modinfop)); 345 } 346 347 /* 348 * Acquire a grant reference. 349 */ 350 static grant_ref_t 351 gref_get(xnf_t *xnfp) 352 { 353 grant_ref_t gref; 354 355 mutex_enter(&xnfp->xnf_gref_lock); 356 357 do { 358 gref = gnttab_claim_grant_reference(&xnfp->xnf_gref_head); 359 360 } while ((gref == INVALID_GRANT_REF) && 361 (gnttab_alloc_grant_references(16, &xnfp->xnf_gref_head) == 0)); 362 363 mutex_exit(&xnfp->xnf_gref_lock); 364 365 if (gref == INVALID_GRANT_REF) { 366 xnfp->xnf_stat_gref_failure++; 367 } else { 368 atomic_add_64(&xnfp->xnf_stat_gref_outstanding, 1); 369 if (xnfp->xnf_stat_gref_outstanding > xnfp->xnf_stat_gref_peak) 370 xnfp->xnf_stat_gref_peak = 371 xnfp->xnf_stat_gref_outstanding; 372 } 373 374 return (gref); 375 } 376 377 /* 378 * Release a grant reference. 379 */ 380 static void 381 gref_put(xnf_t *xnfp, grant_ref_t gref) 382 { 383 ASSERT(gref != INVALID_GRANT_REF); 384 385 mutex_enter(&xnfp->xnf_gref_lock); 386 gnttab_release_grant_reference(&xnfp->xnf_gref_head, gref); 387 mutex_exit(&xnfp->xnf_gref_lock); 388 389 atomic_add_64(&xnfp->xnf_stat_gref_outstanding, -1); 390 } 391 392 /* 393 * Acquire a transmit id. 394 */ 395 static xnf_txid_t * 396 txid_get(xnf_t *xnfp) 397 { 398 xnf_txid_t *tidp; 399 400 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 401 402 if (xnfp->xnf_tx_pkt_id_head == INVALID_TX_ID) 403 return (NULL); 404 405 ASSERT(TX_ID_VALID(xnfp->xnf_tx_pkt_id_head)); 406 407 tidp = TX_ID_TO_TXID(xnfp, xnfp->xnf_tx_pkt_id_head); 408 xnfp->xnf_tx_pkt_id_head = tidp->next; 409 tidp->next = INVALID_TX_ID; 410 411 ASSERT(tidp->txbuf == NULL); 412 413 return (tidp); 414 } 415 416 /* 417 * Release a transmit id. 418 */ 419 static void 420 txid_put(xnf_t *xnfp, xnf_txid_t *tidp) 421 { 422 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 423 ASSERT(TX_ID_VALID(tidp->id)); 424 ASSERT(tidp->next == INVALID_TX_ID); 425 426 tidp->txbuf = NULL; 427 tidp->next = xnfp->xnf_tx_pkt_id_head; 428 xnfp->xnf_tx_pkt_id_head = tidp->id; 429 } 430 431 /* 432 * Get `wanted' slots in the transmit ring, waiting for at least that 433 * number if `wait' is B_TRUE. Force the ring to be cleaned by setting 434 * `wanted' to zero. 435 * 436 * Return the number of slots available. 437 */ 438 static int 439 tx_slots_get(xnf_t *xnfp, int wanted, boolean_t wait) 440 { 441 int slotsfree; 442 boolean_t forced_clean = (wanted == 0); 443 444 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 445 446 /* LINTED: constant in conditional context */ 447 while (B_TRUE) { 448 slotsfree = RING_FREE_REQUESTS(&xnfp->xnf_tx_ring); 449 450 if ((slotsfree < wanted) || forced_clean) 451 slotsfree = xnf_tx_clean_ring(xnfp); 452 453 /* 454 * If there are more than we need free, tell other 455 * people to come looking again. We hold txlock, so we 456 * are able to take our slots before anyone else runs. 457 */ 458 if (slotsfree > wanted) 459 cv_broadcast(&xnfp->xnf_cv_tx_slots); 460 461 if (slotsfree >= wanted) 462 break; 463 464 if (!wait) 465 break; 466 467 cv_wait(&xnfp->xnf_cv_tx_slots, &xnfp->xnf_txlock); 468 } 469 470 ASSERT(slotsfree <= RING_SIZE(&(xnfp->xnf_tx_ring))); 471 472 return (slotsfree); 473 } 474 475 static int 476 xnf_setup_rings(xnf_t *xnfp) 477 { 478 domid_t oeid; 479 struct xenbus_device *xsd; 480 RING_IDX i; 481 int err; 482 xnf_txid_t *tidp; 483 xnf_buf_t **bdescp; 484 485 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 486 xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 487 488 if (xnfp->xnf_tx_ring_ref != INVALID_GRANT_REF) 489 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 490 491 err = gnttab_grant_foreign_access(oeid, 492 xnf_btop(pa_to_ma(xnfp->xnf_tx_ring_phys_addr)), 0); 493 if (err <= 0) { 494 err = -err; 495 xenbus_dev_error(xsd, err, "granting access to tx ring page"); 496 goto out; 497 } 498 xnfp->xnf_tx_ring_ref = (grant_ref_t)err; 499 500 if (xnfp->xnf_rx_ring_ref != INVALID_GRANT_REF) 501 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 502 503 err = gnttab_grant_foreign_access(oeid, 504 xnf_btop(pa_to_ma(xnfp->xnf_rx_ring_phys_addr)), 0); 505 if (err <= 0) { 506 err = -err; 507 xenbus_dev_error(xsd, err, "granting access to rx ring page"); 508 goto out; 509 } 510 xnfp->xnf_rx_ring_ref = (grant_ref_t)err; 511 512 mutex_enter(&xnfp->xnf_txlock); 513 514 /* 515 * Setup/cleanup the TX ring. Note that this can lose packets 516 * after a resume, but we expect to stagger on. 517 */ 518 xnfp->xnf_tx_pkt_id_head = INVALID_TX_ID; /* I.e. emtpy list. */ 519 for (i = 0, tidp = &xnfp->xnf_tx_pkt_id[0]; 520 i < NET_TX_RING_SIZE; 521 i++, tidp++) { 522 xnf_txbuf_t *txp; 523 524 tidp->id = i; 525 526 txp = tidp->txbuf; 527 if (txp == NULL) { 528 tidp->next = INVALID_TX_ID; /* Appease txid_put(). */ 529 txid_put(xnfp, tidp); 530 continue; 531 } 532 533 ASSERT(txp->tx_txreq.gref != INVALID_GRANT_REF); 534 ASSERT(txp->tx_mp != NULL); 535 536 switch (txp->tx_type) { 537 case TX_DATA: 538 VERIFY(gnttab_query_foreign_access(txp->tx_txreq.gref) 539 == 0); 540 541 if (txp->tx_bdesc == NULL) { 542 (void) gnttab_end_foreign_access_ref( 543 txp->tx_txreq.gref, 1); 544 gref_put(xnfp, txp->tx_txreq.gref); 545 (void) ddi_dma_unbind_handle( 546 txp->tx_dma_handle); 547 } else { 548 xnf_buf_put(xnfp, txp->tx_bdesc, B_TRUE); 549 } 550 551 freemsg(txp->tx_mp); 552 txid_put(xnfp, tidp); 553 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 554 555 break; 556 557 case TX_MCAST_REQ: 558 txp->tx_type = TX_MCAST_RSP; 559 txp->tx_status = NETIF_RSP_DROPPED; 560 cv_broadcast(&xnfp->xnf_cv_multicast); 561 562 /* 563 * The request consumed two slots in the ring, 564 * yet only a single xnf_txid_t is used. Step 565 * over the empty slot. 566 */ 567 i++; 568 ASSERT(i < NET_TX_RING_SIZE); 569 570 break; 571 572 case TX_MCAST_RSP: 573 break; 574 } 575 } 576 577 /* LINTED: constant in conditional context */ 578 SHARED_RING_INIT(xnfp->xnf_tx_ring.sring); 579 /* LINTED: constant in conditional context */ 580 FRONT_RING_INIT(&xnfp->xnf_tx_ring, 581 xnfp->xnf_tx_ring.sring, PAGESIZE); 582 583 mutex_exit(&xnfp->xnf_txlock); 584 585 mutex_enter(&xnfp->xnf_rxlock); 586 587 /* 588 * Clean out any buffers currently posted to the receive ring 589 * before we reset it. 590 */ 591 for (i = 0, bdescp = &xnfp->xnf_rx_pkt_info[0]; 592 i < NET_RX_RING_SIZE; 593 i++, bdescp++) { 594 if (*bdescp != NULL) { 595 xnf_buf_put(xnfp, *bdescp, B_FALSE); 596 *bdescp = NULL; 597 } 598 } 599 600 /* LINTED: constant in conditional context */ 601 SHARED_RING_INIT(xnfp->xnf_rx_ring.sring); 602 /* LINTED: constant in conditional context */ 603 FRONT_RING_INIT(&xnfp->xnf_rx_ring, 604 xnfp->xnf_rx_ring.sring, PAGESIZE); 605 606 /* 607 * Fill the ring with buffers. 608 */ 609 for (i = 0; i < NET_RX_RING_SIZE; i++) { 610 xnf_buf_t *bdesc; 611 612 bdesc = xnf_buf_get(xnfp, KM_SLEEP, B_FALSE); 613 VERIFY(bdesc != NULL); 614 xnf_rxbuf_hang(xnfp, bdesc); 615 } 616 617 /* LINTED: constant in conditional context */ 618 RING_PUSH_REQUESTS(&xnfp->xnf_rx_ring); 619 620 mutex_exit(&xnfp->xnf_rxlock); 621 622 return (0); 623 624 out: 625 if (xnfp->xnf_tx_ring_ref != INVALID_GRANT_REF) 626 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 627 xnfp->xnf_tx_ring_ref = INVALID_GRANT_REF; 628 629 if (xnfp->xnf_rx_ring_ref != INVALID_GRANT_REF) 630 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 631 xnfp->xnf_rx_ring_ref = INVALID_GRANT_REF; 632 633 return (err); 634 } 635 636 /* 637 * Connect driver to back end, called to set up communication with 638 * back end driver both initially and on resume after restore/migrate. 639 */ 640 void 641 xnf_be_connect(xnf_t *xnfp) 642 { 643 const char *message; 644 xenbus_transaction_t xbt; 645 struct xenbus_device *xsd; 646 char *xsname; 647 int err; 648 649 ASSERT(!xnfp->xnf_connected); 650 651 xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 652 xsname = xvdi_get_xsname(xnfp->xnf_devinfo); 653 654 err = xnf_setup_rings(xnfp); 655 if (err != 0) { 656 cmn_err(CE_WARN, "failed to set up tx/rx rings"); 657 xenbus_dev_error(xsd, err, "setting up ring"); 658 return; 659 } 660 661 again: 662 err = xenbus_transaction_start(&xbt); 663 if (err != 0) { 664 xenbus_dev_error(xsd, EIO, "starting transaction"); 665 return; 666 } 667 668 err = xenbus_printf(xbt, xsname, "tx-ring-ref", "%u", 669 xnfp->xnf_tx_ring_ref); 670 if (err != 0) { 671 message = "writing tx ring-ref"; 672 goto abort_transaction; 673 } 674 675 err = xenbus_printf(xbt, xsname, "rx-ring-ref", "%u", 676 xnfp->xnf_rx_ring_ref); 677 if (err != 0) { 678 message = "writing rx ring-ref"; 679 goto abort_transaction; 680 } 681 682 err = xenbus_printf(xbt, xsname, "event-channel", "%u", 683 xnfp->xnf_evtchn); 684 if (err != 0) { 685 message = "writing event-channel"; 686 goto abort_transaction; 687 } 688 689 err = xenbus_printf(xbt, xsname, "feature-rx-notify", "%d", 1); 690 if (err != 0) { 691 message = "writing feature-rx-notify"; 692 goto abort_transaction; 693 } 694 695 err = xenbus_printf(xbt, xsname, "request-rx-copy", "%d", 1); 696 if (err != 0) { 697 message = "writing request-rx-copy"; 698 goto abort_transaction; 699 } 700 701 if (xnfp->xnf_be_mcast_control) { 702 err = xenbus_printf(xbt, xsname, "request-multicast-control", 703 "%d", 1); 704 if (err != 0) { 705 message = "writing request-multicast-control"; 706 goto abort_transaction; 707 } 708 } 709 710 err = xvdi_switch_state(xnfp->xnf_devinfo, xbt, XenbusStateConnected); 711 if (err != 0) { 712 message = "switching state to XenbusStateConnected"; 713 goto abort_transaction; 714 } 715 716 err = xenbus_transaction_end(xbt, 0); 717 if (err != 0) { 718 if (err == EAGAIN) 719 goto again; 720 xenbus_dev_error(xsd, err, "completing transaction"); 721 } 722 723 return; 724 725 abort_transaction: 726 (void) xenbus_transaction_end(xbt, 1); 727 xenbus_dev_error(xsd, err, "%s", message); 728 } 729 730 /* 731 * Read configuration information from xenstore. 732 */ 733 void 734 xnf_read_config(xnf_t *xnfp) 735 { 736 int err, be_cap; 737 char mac[ETHERADDRL * 3]; 738 char *oename = xvdi_get_oename(xnfp->xnf_devinfo); 739 740 err = xenbus_scanf(XBT_NULL, oename, "mac", 741 "%s", (char *)&mac[0]); 742 if (err != 0) { 743 /* 744 * bad: we're supposed to be set up with a proper mac 745 * addr. at this point 746 */ 747 cmn_err(CE_WARN, "%s%d: no mac address", 748 ddi_driver_name(xnfp->xnf_devinfo), 749 ddi_get_instance(xnfp->xnf_devinfo)); 750 return; 751 } 752 if (ether_aton(mac, xnfp->xnf_mac_addr) != ETHERADDRL) { 753 err = ENOENT; 754 xenbus_dev_error(xvdi_get_xsd(xnfp->xnf_devinfo), ENOENT, 755 "parsing %s/mac", xvdi_get_xsname(xnfp->xnf_devinfo)); 756 return; 757 } 758 759 err = xenbus_scanf(XBT_NULL, oename, 760 "feature-rx-copy", "%d", &be_cap); 761 /* 762 * If we fail to read the store we assume that the key is 763 * absent, implying an older domain at the far end. Older 764 * domains cannot do HV copy. 765 */ 766 if (err != 0) 767 be_cap = 0; 768 xnfp->xnf_be_rx_copy = (be_cap != 0); 769 770 err = xenbus_scanf(XBT_NULL, oename, 771 "feature-multicast-control", "%d", &be_cap); 772 /* 773 * If we fail to read the store we assume that the key is 774 * absent, implying an older domain at the far end. Older 775 * domains do not support multicast control. 776 */ 777 if (err != 0) 778 be_cap = 0; 779 xnfp->xnf_be_mcast_control = (be_cap != 0) && xnf_multicast_control; 780 } 781 782 /* 783 * attach(9E) -- Attach a device to the system 784 */ 785 static int 786 xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) 787 { 788 mac_register_t *macp; 789 xnf_t *xnfp; 790 int err; 791 char cachename[32]; 792 793 #ifdef XNF_DEBUG 794 if (xnf_debug & XNF_DEBUG_DDI) 795 printf("xnf%d: attach(0x%p)\n", ddi_get_instance(devinfo), 796 (void *)devinfo); 797 #endif 798 799 switch (cmd) { 800 case DDI_RESUME: 801 xnfp = ddi_get_driver_private(devinfo); 802 xnfp->xnf_gen++; 803 804 (void) xvdi_resume(devinfo); 805 (void) xvdi_alloc_evtchn(devinfo); 806 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 807 #ifdef XPV_HVM_DRIVER 808 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, 809 xnfp); 810 #else 811 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, 812 (caddr_t)xnfp); 813 #endif 814 return (DDI_SUCCESS); 815 816 case DDI_ATTACH: 817 break; 818 819 default: 820 return (DDI_FAILURE); 821 } 822 823 /* 824 * Allocate gld_mac_info_t and xnf_instance structures 825 */ 826 macp = mac_alloc(MAC_VERSION); 827 if (macp == NULL) 828 return (DDI_FAILURE); 829 xnfp = kmem_zalloc(sizeof (*xnfp), KM_SLEEP); 830 831 macp->m_dip = devinfo; 832 macp->m_driver = xnfp; 833 xnfp->xnf_devinfo = devinfo; 834 835 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 836 macp->m_src_addr = xnfp->xnf_mac_addr; 837 macp->m_callbacks = &xnf_callbacks; 838 macp->m_min_sdu = 0; 839 macp->m_max_sdu = XNF_MAXPKT; 840 841 xnfp->xnf_running = B_FALSE; 842 xnfp->xnf_connected = B_FALSE; 843 xnfp->xnf_be_rx_copy = B_FALSE; 844 xnfp->xnf_be_mcast_control = B_FALSE; 845 xnfp->xnf_need_sched = B_FALSE; 846 847 xnfp->xnf_rx_head = NULL; 848 xnfp->xnf_rx_tail = NULL; 849 xnfp->xnf_rx_new_buffers_posted = B_FALSE; 850 851 #ifdef XPV_HVM_DRIVER 852 /* 853 * Report our version to dom0. 854 */ 855 if (xenbus_printf(XBT_NULL, "guest/xnf", "version", "%d", 856 HVMPV_XNF_VERS)) 857 cmn_err(CE_WARN, "xnf: couldn't write version\n"); 858 #endif 859 860 /* 861 * Get the iblock cookie with which to initialize the mutexes. 862 */ 863 if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->xnf_icookie) 864 != DDI_SUCCESS) 865 goto failure; 866 867 mutex_init(&xnfp->xnf_txlock, 868 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 869 mutex_init(&xnfp->xnf_rxlock, 870 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 871 mutex_init(&xnfp->xnf_schedlock, 872 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 873 mutex_init(&xnfp->xnf_gref_lock, 874 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 875 876 cv_init(&xnfp->xnf_cv_state, NULL, CV_DEFAULT, NULL); 877 cv_init(&xnfp->xnf_cv_multicast, NULL, CV_DEFAULT, NULL); 878 cv_init(&xnfp->xnf_cv_tx_slots, NULL, CV_DEFAULT, NULL); 879 880 (void) sprintf(cachename, "xnf_buf_cache_%d", 881 ddi_get_instance(devinfo)); 882 xnfp->xnf_buf_cache = kmem_cache_create(cachename, 883 sizeof (xnf_buf_t), 0, 884 xnf_buf_constructor, xnf_buf_destructor, 885 NULL, xnfp, NULL, 0); 886 if (xnfp->xnf_buf_cache == NULL) 887 goto failure_0; 888 889 (void) sprintf(cachename, "xnf_tx_buf_cache_%d", 890 ddi_get_instance(devinfo)); 891 xnfp->xnf_tx_buf_cache = kmem_cache_create(cachename, 892 sizeof (xnf_txbuf_t), 0, 893 xnf_tx_buf_constructor, xnf_tx_buf_destructor, 894 NULL, xnfp, NULL, 0); 895 if (xnfp->xnf_tx_buf_cache == NULL) 896 goto failure_1; 897 898 xnfp->xnf_gref_head = INVALID_GRANT_REF; 899 900 if (xnf_alloc_dma_resources(xnfp) == DDI_FAILURE) { 901 cmn_err(CE_WARN, "xnf%d: failed to allocate and initialize " 902 "driver data structures", 903 ddi_get_instance(xnfp->xnf_devinfo)); 904 goto failure_2; 905 } 906 907 xnfp->xnf_rx_ring.sring->rsp_event = 908 xnfp->xnf_tx_ring.sring->rsp_event = 1; 909 910 xnfp->xnf_tx_ring_ref = INVALID_GRANT_REF; 911 xnfp->xnf_rx_ring_ref = INVALID_GRANT_REF; 912 913 /* set driver private pointer now */ 914 ddi_set_driver_private(devinfo, xnfp); 915 916 if (!xnf_kstat_init(xnfp)) 917 goto failure_3; 918 919 /* 920 * Allocate an event channel, add the interrupt handler and 921 * bind it to the event channel. 922 */ 923 (void) xvdi_alloc_evtchn(devinfo); 924 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 925 #ifdef XPV_HVM_DRIVER 926 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, xnfp); 927 #else 928 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, (caddr_t)xnfp); 929 #endif 930 931 err = mac_register(macp, &xnfp->xnf_mh); 932 mac_free(macp); 933 macp = NULL; 934 if (err != 0) 935 goto failure_4; 936 937 if (xvdi_add_event_handler(devinfo, XS_OE_STATE, oe_state_change, NULL) 938 != DDI_SUCCESS) 939 goto failure_5; 940 941 #ifdef XPV_HVM_DRIVER 942 /* 943 * In the HVM case, this driver essentially replaces a driver for 944 * a 'real' PCI NIC. Without the "model" property set to 945 * "Ethernet controller", like the PCI code does, netbooting does 946 * not work correctly, as strplumb_get_netdev_path() will not find 947 * this interface. 948 */ 949 (void) ndi_prop_update_string(DDI_DEV_T_NONE, devinfo, "model", 950 "Ethernet controller"); 951 #endif 952 953 #ifdef XNF_DEBUG 954 if (xnf_debug_instance == NULL) 955 xnf_debug_instance = xnfp; 956 #endif 957 958 return (DDI_SUCCESS); 959 960 failure_5: 961 (void) mac_unregister(xnfp->xnf_mh); 962 963 failure_4: 964 #ifdef XPV_HVM_DRIVER 965 ec_unbind_evtchn(xnfp->xnf_evtchn); 966 xvdi_free_evtchn(devinfo); 967 #else 968 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 969 #endif 970 xnfp->xnf_evtchn = INVALID_EVTCHN; 971 kstat_delete(xnfp->xnf_kstat_aux); 972 973 failure_3: 974 xnf_release_dma_resources(xnfp); 975 976 failure_2: 977 kmem_cache_destroy(xnfp->xnf_tx_buf_cache); 978 979 failure_1: 980 kmem_cache_destroy(xnfp->xnf_buf_cache); 981 982 failure_0: 983 cv_destroy(&xnfp->xnf_cv_tx_slots); 984 cv_destroy(&xnfp->xnf_cv_multicast); 985 cv_destroy(&xnfp->xnf_cv_state); 986 987 mutex_destroy(&xnfp->xnf_gref_lock); 988 mutex_destroy(&xnfp->xnf_schedlock); 989 mutex_destroy(&xnfp->xnf_rxlock); 990 mutex_destroy(&xnfp->xnf_txlock); 991 992 failure: 993 kmem_free(xnfp, sizeof (*xnfp)); 994 if (macp != NULL) 995 mac_free(macp); 996 997 return (DDI_FAILURE); 998 } 999 1000 /* detach(9E) -- Detach a device from the system */ 1001 static int 1002 xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) 1003 { 1004 xnf_t *xnfp; /* Our private device info */ 1005 1006 #ifdef XNF_DEBUG 1007 if (xnf_debug & XNF_DEBUG_DDI) 1008 printf("xnf_detach(0x%p)\n", (void *)devinfo); 1009 #endif 1010 1011 xnfp = ddi_get_driver_private(devinfo); 1012 1013 switch (cmd) { 1014 case DDI_SUSPEND: 1015 #ifdef XPV_HVM_DRIVER 1016 ec_unbind_evtchn(xnfp->xnf_evtchn); 1017 xvdi_free_evtchn(devinfo); 1018 #else 1019 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 1020 #endif 1021 1022 xvdi_suspend(devinfo); 1023 1024 mutex_enter(&xnfp->xnf_rxlock); 1025 mutex_enter(&xnfp->xnf_txlock); 1026 1027 xnfp->xnf_evtchn = INVALID_EVTCHN; 1028 xnfp->xnf_connected = B_FALSE; 1029 mutex_exit(&xnfp->xnf_txlock); 1030 mutex_exit(&xnfp->xnf_rxlock); 1031 1032 /* claim link to be down after disconnect */ 1033 mac_link_update(xnfp->xnf_mh, LINK_STATE_DOWN); 1034 return (DDI_SUCCESS); 1035 1036 case DDI_DETACH: 1037 break; 1038 1039 default: 1040 return (DDI_FAILURE); 1041 } 1042 1043 if (xnfp->xnf_connected) 1044 return (DDI_FAILURE); 1045 1046 /* 1047 * Cannot detach if we have xnf_buf_t outstanding. 1048 */ 1049 if (xnfp->xnf_stat_buf_allocated > 0) 1050 return (DDI_FAILURE); 1051 1052 if (mac_unregister(xnfp->xnf_mh) != 0) 1053 return (DDI_FAILURE); 1054 1055 kstat_delete(xnfp->xnf_kstat_aux); 1056 1057 /* Stop the receiver */ 1058 xnf_stop(xnfp); 1059 1060 xvdi_remove_event_handler(devinfo, XS_OE_STATE); 1061 1062 /* Remove the interrupt */ 1063 #ifdef XPV_HVM_DRIVER 1064 ec_unbind_evtchn(xnfp->xnf_evtchn); 1065 xvdi_free_evtchn(devinfo); 1066 #else 1067 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 1068 #endif 1069 1070 /* Release any pending xmit mblks */ 1071 xnf_release_mblks(xnfp); 1072 1073 /* Release all DMA resources */ 1074 xnf_release_dma_resources(xnfp); 1075 1076 cv_destroy(&xnfp->xnf_cv_tx_slots); 1077 cv_destroy(&xnfp->xnf_cv_multicast); 1078 cv_destroy(&xnfp->xnf_cv_state); 1079 1080 kmem_cache_destroy(xnfp->xnf_tx_buf_cache); 1081 kmem_cache_destroy(xnfp->xnf_buf_cache); 1082 1083 mutex_destroy(&xnfp->xnf_gref_lock); 1084 mutex_destroy(&xnfp->xnf_schedlock); 1085 mutex_destroy(&xnfp->xnf_rxlock); 1086 mutex_destroy(&xnfp->xnf_txlock); 1087 1088 kmem_free(xnfp, sizeof (*xnfp)); 1089 1090 return (DDI_SUCCESS); 1091 } 1092 1093 /* 1094 * xnf_set_mac_addr() -- set the physical network address on the board. 1095 */ 1096 static int 1097 xnf_set_mac_addr(void *arg, const uint8_t *macaddr) 1098 { 1099 _NOTE(ARGUNUSED(arg, macaddr)); 1100 1101 /* 1102 * We can't set our macaddr. 1103 */ 1104 return (ENOTSUP); 1105 } 1106 1107 /* 1108 * xnf_set_multicast() -- set (enable) or disable a multicast address. 1109 * 1110 * Program the hardware to enable/disable the multicast address 1111 * in "mca". Enable if "add" is true, disable if false. 1112 */ 1113 static int 1114 xnf_set_multicast(void *arg, boolean_t add, const uint8_t *mca) 1115 { 1116 xnf_t *xnfp = arg; 1117 xnf_txbuf_t *txp; 1118 int n_slots; 1119 RING_IDX slot; 1120 xnf_txid_t *tidp; 1121 netif_tx_request_t *txrp; 1122 struct netif_extra_info *erp; 1123 boolean_t notify, result; 1124 1125 /* 1126 * If the backend does not support multicast control then we 1127 * must assume that the right packets will just arrive. 1128 */ 1129 if (!xnfp->xnf_be_mcast_control) 1130 return (0); 1131 1132 txp = kmem_cache_alloc(xnfp->xnf_tx_buf_cache, KM_SLEEP); 1133 1134 mutex_enter(&xnfp->xnf_txlock); 1135 1136 /* 1137 * If we're not yet connected then claim success. This is 1138 * acceptable because we refresh the entire set of multicast 1139 * addresses when we get connected. 1140 * 1141 * We can't wait around here because the MAC layer expects 1142 * this to be a non-blocking operation - waiting ends up 1143 * causing a deadlock during resume. 1144 */ 1145 if (!xnfp->xnf_connected) { 1146 mutex_exit(&xnfp->xnf_txlock); 1147 return (0); 1148 } 1149 1150 /* 1151 * 1. Acquire two slots in the ring. 1152 * 2. Fill in the slots. 1153 * 3. Request notification when the operation is done. 1154 * 4. Kick the peer. 1155 * 5. Wait for the response via xnf_tx_clean_ring(). 1156 */ 1157 1158 n_slots = tx_slots_get(xnfp, 2, B_TRUE); 1159 ASSERT(n_slots >= 2); 1160 1161 slot = xnfp->xnf_tx_ring.req_prod_pvt; 1162 tidp = txid_get(xnfp); 1163 VERIFY(tidp != NULL); 1164 1165 txp->tx_type = TX_MCAST_REQ; 1166 txp->tx_slot = slot; 1167 1168 txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot); 1169 erp = (struct netif_extra_info *) 1170 RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot + 1); 1171 1172 txrp->gref = 0; 1173 txrp->size = 0; 1174 txrp->offset = 0; 1175 /* Set tx_txreq.id to appease xnf_tx_clean_ring(). */ 1176 txrp->id = txp->tx_txreq.id = tidp->id; 1177 txrp->flags = NETTXF_extra_info; 1178 1179 erp->type = add ? XEN_NETIF_EXTRA_TYPE_MCAST_ADD : 1180 XEN_NETIF_EXTRA_TYPE_MCAST_DEL; 1181 bcopy((void *)mca, &erp->u.mcast.addr, ETHERADDRL); 1182 1183 tidp->txbuf = txp; 1184 1185 xnfp->xnf_tx_ring.req_prod_pvt = slot + 2; 1186 1187 mutex_enter(&xnfp->xnf_schedlock); 1188 xnfp->xnf_pending_multicast++; 1189 mutex_exit(&xnfp->xnf_schedlock); 1190 1191 /* LINTED: constant in conditional context */ 1192 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring, 1193 notify); 1194 if (notify) 1195 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1196 1197 while (txp->tx_type == TX_MCAST_REQ) 1198 cv_wait(&xnfp->xnf_cv_multicast, 1199 &xnfp->xnf_txlock); 1200 1201 ASSERT(txp->tx_type == TX_MCAST_RSP); 1202 1203 mutex_enter(&xnfp->xnf_schedlock); 1204 xnfp->xnf_pending_multicast--; 1205 mutex_exit(&xnfp->xnf_schedlock); 1206 1207 result = (txp->tx_status == NETIF_RSP_OKAY); 1208 1209 txid_put(xnfp, tidp); 1210 1211 mutex_exit(&xnfp->xnf_txlock); 1212 1213 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1214 1215 return (result ? 0 : 1); 1216 } 1217 1218 /* 1219 * xnf_set_promiscuous() -- set or reset promiscuous mode on the board 1220 * 1221 * Program the hardware to enable/disable promiscuous mode. 1222 */ 1223 static int 1224 xnf_set_promiscuous(void *arg, boolean_t on) 1225 { 1226 _NOTE(ARGUNUSED(arg, on)); 1227 1228 /* 1229 * We can't really do this, but we pretend that we can in 1230 * order that snoop will work. 1231 */ 1232 return (0); 1233 } 1234 1235 /* 1236 * Clean buffers that we have responses for from the transmit ring. 1237 */ 1238 static int 1239 xnf_tx_clean_ring(xnf_t *xnfp) 1240 { 1241 boolean_t work_to_do; 1242 1243 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 1244 1245 loop: 1246 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_tx_ring)) { 1247 RING_IDX cons, prod, i; 1248 1249 cons = xnfp->xnf_tx_ring.rsp_cons; 1250 prod = xnfp->xnf_tx_ring.sring->rsp_prod; 1251 membar_consumer(); 1252 /* 1253 * Clean tx requests from ring that we have responses 1254 * for. 1255 */ 1256 DTRACE_PROBE2(xnf_tx_clean_range, int, cons, int, prod); 1257 for (i = cons; i != prod; i++) { 1258 netif_tx_response_t *trp; 1259 xnf_txid_t *tidp; 1260 xnf_txbuf_t *txp; 1261 1262 trp = RING_GET_RESPONSE(&xnfp->xnf_tx_ring, i); 1263 ASSERT(TX_ID_VALID(trp->id)); 1264 1265 tidp = TX_ID_TO_TXID(xnfp, trp->id); 1266 ASSERT(tidp->id == trp->id); 1267 ASSERT(tidp->next == INVALID_TX_ID); 1268 1269 txp = tidp->txbuf; 1270 ASSERT(txp != NULL); 1271 ASSERT(txp->tx_txreq.id == trp->id); 1272 1273 switch (txp->tx_type) { 1274 case TX_DATA: 1275 if (gnttab_query_foreign_access( 1276 txp->tx_txreq.gref) != 0) 1277 cmn_err(CE_PANIC, 1278 "tx grant %d still in use by " 1279 "backend domain", 1280 txp->tx_txreq.gref); 1281 1282 if (txp->tx_bdesc == NULL) { 1283 (void) gnttab_end_foreign_access_ref( 1284 txp->tx_txreq.gref, 1); 1285 gref_put(xnfp, txp->tx_txreq.gref); 1286 (void) ddi_dma_unbind_handle( 1287 txp->tx_dma_handle); 1288 } else { 1289 xnf_buf_put(xnfp, txp->tx_bdesc, 1290 B_TRUE); 1291 } 1292 1293 freemsg(txp->tx_mp); 1294 txid_put(xnfp, tidp); 1295 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1296 1297 break; 1298 1299 case TX_MCAST_REQ: 1300 txp->tx_type = TX_MCAST_RSP; 1301 txp->tx_status = trp->status; 1302 cv_broadcast(&xnfp->xnf_cv_multicast); 1303 1304 break; 1305 1306 case TX_MCAST_RSP: 1307 break; 1308 1309 default: 1310 cmn_err(CE_PANIC, "xnf_tx_clean_ring: " 1311 "invalid xnf_txbuf_t type: %d", 1312 txp->tx_type); 1313 break; 1314 } 1315 } 1316 /* 1317 * Record the last response we dealt with so that we 1318 * know where to start next time around. 1319 */ 1320 xnfp->xnf_tx_ring.rsp_cons = prod; 1321 membar_enter(); 1322 } 1323 1324 /* LINTED: constant in conditional context */ 1325 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_tx_ring, work_to_do); 1326 if (work_to_do) 1327 goto loop; 1328 1329 return (RING_FREE_REQUESTS(&xnfp->xnf_tx_ring)); 1330 } 1331 1332 /* 1333 * Allocate and fill in a look-aside buffer for the packet `mp'. Used 1334 * to ensure that the packet is physically contiguous and contained 1335 * within a single page. 1336 */ 1337 static xnf_buf_t * 1338 xnf_tx_pullup(xnf_t *xnfp, mblk_t *mp) 1339 { 1340 xnf_buf_t *bd; 1341 caddr_t bp; 1342 1343 bd = xnf_buf_get(xnfp, KM_SLEEP, B_TRUE); 1344 if (bd == NULL) 1345 return (NULL); 1346 1347 bp = bd->buf; 1348 while (mp != NULL) { 1349 size_t len = MBLKL(mp); 1350 1351 bcopy(mp->b_rptr, bp, len); 1352 bp += len; 1353 1354 mp = mp->b_cont; 1355 } 1356 1357 ASSERT((bp - bd->buf) <= PAGESIZE); 1358 1359 xnfp->xnf_stat_tx_pullup++; 1360 1361 return (bd); 1362 } 1363 1364 /* 1365 * Insert the pseudo-header checksum into the packet `buf'. 1366 */ 1367 void 1368 xnf_pseudo_cksum(caddr_t buf, int length) 1369 { 1370 struct ether_header *ehp; 1371 uint16_t sap, len, *stuff; 1372 uint32_t cksum; 1373 size_t offset; 1374 ipha_t *ipha; 1375 ipaddr_t src, dst; 1376 1377 ASSERT(length >= sizeof (*ehp)); 1378 ehp = (struct ether_header *)buf; 1379 1380 if (ntohs(ehp->ether_type) == VLAN_TPID) { 1381 struct ether_vlan_header *evhp; 1382 1383 ASSERT(length >= sizeof (*evhp)); 1384 evhp = (struct ether_vlan_header *)buf; 1385 sap = ntohs(evhp->ether_type); 1386 offset = sizeof (*evhp); 1387 } else { 1388 sap = ntohs(ehp->ether_type); 1389 offset = sizeof (*ehp); 1390 } 1391 1392 ASSERT(sap == ETHERTYPE_IP); 1393 1394 /* Packet should have been pulled up by the caller. */ 1395 if ((offset + sizeof (ipha_t)) > length) { 1396 cmn_err(CE_WARN, "xnf_pseudo_cksum: no room for checksum"); 1397 return; 1398 } 1399 1400 ipha = (ipha_t *)(buf + offset); 1401 1402 ASSERT(IPH_HDR_LENGTH(ipha) == IP_SIMPLE_HDR_LENGTH); 1403 1404 len = ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH; 1405 1406 switch (ipha->ipha_protocol) { 1407 case IPPROTO_TCP: 1408 stuff = IPH_TCPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 1409 cksum = IP_TCP_CSUM_COMP; 1410 break; 1411 case IPPROTO_UDP: 1412 stuff = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 1413 cksum = IP_UDP_CSUM_COMP; 1414 break; 1415 default: 1416 cmn_err(CE_WARN, "xnf_pseudo_cksum: unexpected protocol %d", 1417 ipha->ipha_protocol); 1418 return; 1419 } 1420 1421 src = ipha->ipha_src; 1422 dst = ipha->ipha_dst; 1423 1424 cksum += (dst >> 16) + (dst & 0xFFFF); 1425 cksum += (src >> 16) + (src & 0xFFFF); 1426 cksum += htons(len); 1427 1428 cksum = (cksum >> 16) + (cksum & 0xFFFF); 1429 cksum = (cksum >> 16) + (cksum & 0xFFFF); 1430 1431 ASSERT(cksum <= 0xFFFF); 1432 1433 *stuff = (uint16_t)(cksum ? cksum : ~cksum); 1434 } 1435 1436 /* 1437 * Push a list of prepared packets (`txp') into the transmit ring. 1438 */ 1439 static xnf_txbuf_t * 1440 tx_push_packets(xnf_t *xnfp, xnf_txbuf_t *txp) 1441 { 1442 int slots_free; 1443 RING_IDX slot; 1444 boolean_t notify; 1445 1446 mutex_enter(&xnfp->xnf_txlock); 1447 1448 ASSERT(xnfp->xnf_running); 1449 1450 /* 1451 * Wait until we are connected to the backend. 1452 */ 1453 while (!xnfp->xnf_connected) 1454 cv_wait(&xnfp->xnf_cv_state, &xnfp->xnf_txlock); 1455 1456 slots_free = tx_slots_get(xnfp, 1, B_FALSE); 1457 DTRACE_PROBE1(xnf_send_slotsfree, int, slots_free); 1458 1459 slot = xnfp->xnf_tx_ring.req_prod_pvt; 1460 1461 while ((txp != NULL) && (slots_free > 0)) { 1462 xnf_txid_t *tidp; 1463 netif_tx_request_t *txrp; 1464 1465 tidp = txid_get(xnfp); 1466 VERIFY(tidp != NULL); 1467 1468 txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot); 1469 1470 txp->tx_slot = slot; 1471 txp->tx_txreq.id = tidp->id; 1472 *txrp = txp->tx_txreq; 1473 1474 tidp->txbuf = txp; 1475 1476 xnfp->xnf_stat_opackets++; 1477 xnfp->xnf_stat_obytes += txp->tx_txreq.size; 1478 1479 txp = txp->tx_next; 1480 slots_free--; 1481 slot++; 1482 1483 } 1484 1485 xnfp->xnf_tx_ring.req_prod_pvt = slot; 1486 1487 /* 1488 * Tell the peer that we sent something, if it cares. 1489 */ 1490 /* LINTED: constant in conditional context */ 1491 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring, 1492 notify); 1493 if (notify) 1494 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1495 1496 mutex_exit(&xnfp->xnf_txlock); 1497 1498 return (txp); 1499 } 1500 1501 /* 1502 * Send the chain of packets `mp'. Called by the MAC framework. 1503 */ 1504 static mblk_t * 1505 xnf_send(void *arg, mblk_t *mp) 1506 { 1507 xnf_t *xnfp = arg; 1508 domid_t oeid; 1509 xnf_txbuf_t *head, *tail; 1510 mblk_t *ml; 1511 int prepared; 1512 1513 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 1514 1515 /* 1516 * Prepare packets for transmission. 1517 */ 1518 head = tail = NULL; 1519 prepared = 0; 1520 while (mp != NULL) { 1521 xnf_txbuf_t *txp; 1522 int n_chunks, length; 1523 boolean_t page_oops; 1524 uint32_t pflags; 1525 1526 for (ml = mp, n_chunks = length = 0, page_oops = B_FALSE; 1527 ml != NULL; 1528 ml = ml->b_cont, n_chunks++) { 1529 1530 /* 1531 * Test if this buffer includes a page 1532 * boundary. The test assumes that the range 1533 * b_rptr...b_wptr can include only a single 1534 * boundary. 1535 */ 1536 if (xnf_btop((size_t)ml->b_rptr) != 1537 xnf_btop((size_t)ml->b_wptr)) { 1538 xnfp->xnf_stat_tx_pagebndry++; 1539 page_oops = B_TRUE; 1540 } 1541 1542 length += MBLKL(ml); 1543 } 1544 DTRACE_PROBE1(xnf_send_b_cont, int, n_chunks); 1545 1546 /* 1547 * Make sure packet isn't too large. 1548 */ 1549 if (length > XNF_FRAMESIZE) { 1550 cmn_err(CE_WARN, 1551 "xnf%d: oversized packet (%d bytes) dropped", 1552 ddi_get_instance(xnfp->xnf_devinfo), length); 1553 freemsg(mp); 1554 continue; 1555 } 1556 1557 txp = kmem_cache_alloc(xnfp->xnf_tx_buf_cache, KM_SLEEP); 1558 1559 txp->tx_type = TX_DATA; 1560 1561 if ((n_chunks > xnf_max_tx_frags) || page_oops) { 1562 /* 1563 * Loan a side buffer rather than the mblk 1564 * itself. 1565 */ 1566 txp->tx_bdesc = xnf_tx_pullup(xnfp, mp); 1567 if (txp->tx_bdesc == NULL) { 1568 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1569 break; 1570 } 1571 1572 txp->tx_bufp = txp->tx_bdesc->buf; 1573 txp->tx_mfn = txp->tx_bdesc->buf_mfn; 1574 txp->tx_txreq.gref = txp->tx_bdesc->grant_ref; 1575 1576 } else { 1577 int rc; 1578 ddi_dma_cookie_t dma_cookie; 1579 uint_t ncookies; 1580 1581 rc = ddi_dma_addr_bind_handle(txp->tx_dma_handle, 1582 NULL, (char *)mp->b_rptr, length, 1583 DDI_DMA_WRITE | DDI_DMA_STREAMING, 1584 DDI_DMA_DONTWAIT, 0, &dma_cookie, 1585 &ncookies); 1586 if (rc != DDI_DMA_MAPPED) { 1587 ASSERT(rc != DDI_DMA_INUSE); 1588 ASSERT(rc != DDI_DMA_PARTIAL_MAP); 1589 1590 #ifdef XNF_DEBUG 1591 if (rc != DDI_DMA_NORESOURCES) 1592 cmn_err(CE_WARN, 1593 "xnf%d: bind_handle failed (%x)", 1594 ddi_get_instance(xnfp->xnf_devinfo), 1595 rc); 1596 #endif 1597 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1598 break; 1599 } 1600 ASSERT(ncookies == 1); 1601 1602 txp->tx_bdesc = NULL; 1603 txp->tx_bufp = (caddr_t)mp->b_rptr; 1604 txp->tx_mfn = 1605 xnf_btop(pa_to_ma(dma_cookie.dmac_laddress)); 1606 txp->tx_txreq.gref = gref_get(xnfp); 1607 if (txp->tx_txreq.gref == INVALID_GRANT_REF) { 1608 (void) ddi_dma_unbind_handle( 1609 txp->tx_dma_handle); 1610 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1611 break; 1612 } 1613 gnttab_grant_foreign_access_ref(txp->tx_txreq.gref, 1614 oeid, txp->tx_mfn, 1); 1615 } 1616 1617 txp->tx_next = NULL; 1618 txp->tx_mp = mp; 1619 txp->tx_txreq.size = length; 1620 txp->tx_txreq.offset = (uintptr_t)txp->tx_bufp & PAGEOFFSET; 1621 txp->tx_txreq.flags = 0; 1622 hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, 1623 &pflags); 1624 if (pflags != 0) { 1625 /* 1626 * If the local protocol stack requests checksum 1627 * offload we set the 'checksum blank' flag, 1628 * indicating to the peer that we need the checksum 1629 * calculated for us. 1630 * 1631 * We _don't_ set the validated flag, because we haven't 1632 * validated that the data and the checksum match. 1633 */ 1634 xnf_pseudo_cksum(txp->tx_bufp, length); 1635 txp->tx_txreq.flags |= NETTXF_csum_blank; 1636 1637 xnfp->xnf_stat_tx_cksum_deferred++; 1638 } 1639 1640 if (head == NULL) { 1641 ASSERT(tail == NULL); 1642 1643 head = txp; 1644 } else { 1645 ASSERT(tail != NULL); 1646 1647 tail->tx_next = txp; 1648 } 1649 tail = txp; 1650 1651 mp = mp->b_next; 1652 prepared++; 1653 1654 /* 1655 * There is no point in preparing more than 1656 * NET_TX_RING_SIZE, as we won't be able to push them 1657 * into the ring in one go and would hence have to 1658 * un-prepare the extra. 1659 */ 1660 if (prepared == NET_TX_RING_SIZE) 1661 break; 1662 } 1663 1664 DTRACE_PROBE1(xnf_send_prepared, int, prepared); 1665 1666 if (mp != NULL) { 1667 #ifdef XNF_DEBUG 1668 int notprepared = 0; 1669 mblk_t *l = mp; 1670 1671 while (l != NULL) { 1672 notprepared++; 1673 l = l->b_next; 1674 } 1675 1676 DTRACE_PROBE1(xnf_send_notprepared, int, notprepared); 1677 #else /* !XNF_DEBUG */ 1678 DTRACE_PROBE1(xnf_send_notprepared, int, -1); 1679 #endif /* XNF_DEBUG */ 1680 } 1681 1682 /* 1683 * Push the packets we have prepared into the ring. They may 1684 * not all go. 1685 */ 1686 if (head != NULL) 1687 head = tx_push_packets(xnfp, head); 1688 1689 /* 1690 * If some packets that we prepared were not sent, unprepare 1691 * them and add them back to the head of those we didn't 1692 * prepare. 1693 */ 1694 { 1695 xnf_txbuf_t *loop; 1696 mblk_t *mp_head, *mp_tail; 1697 int unprepared = 0; 1698 1699 mp_head = mp_tail = NULL; 1700 loop = head; 1701 1702 while (loop != NULL) { 1703 xnf_txbuf_t *next = loop->tx_next; 1704 1705 if (loop->tx_bdesc == NULL) { 1706 (void) gnttab_end_foreign_access_ref( 1707 loop->tx_txreq.gref, 1); 1708 gref_put(xnfp, loop->tx_txreq.gref); 1709 (void) ddi_dma_unbind_handle( 1710 loop->tx_dma_handle); 1711 } else { 1712 xnf_buf_put(xnfp, loop->tx_bdesc, B_TRUE); 1713 } 1714 1715 ASSERT(loop->tx_mp != NULL); 1716 if (mp_head == NULL) 1717 mp_head = loop->tx_mp; 1718 mp_tail = loop->tx_mp; 1719 1720 kmem_cache_free(xnfp->xnf_tx_buf_cache, loop); 1721 loop = next; 1722 unprepared++; 1723 } 1724 1725 if (mp_tail == NULL) { 1726 ASSERT(mp_head == NULL); 1727 } else { 1728 ASSERT(mp_head != NULL); 1729 1730 mp_tail->b_next = mp; 1731 mp = mp_head; 1732 } 1733 1734 DTRACE_PROBE1(xnf_send_unprepared, int, unprepared); 1735 } 1736 1737 /* 1738 * If any mblks are left then we have deferred for some reason 1739 * and need to ask for a re-schedule later. This is typically 1740 * due to the ring filling. 1741 */ 1742 if (mp != NULL) { 1743 mutex_enter(&xnfp->xnf_schedlock); 1744 xnfp->xnf_need_sched = B_TRUE; 1745 mutex_exit(&xnfp->xnf_schedlock); 1746 1747 xnfp->xnf_stat_tx_defer++; 1748 } 1749 1750 return (mp); 1751 } 1752 1753 /* 1754 * Notification of RX packets. Currently no TX-complete interrupt is 1755 * used, as we clean the TX ring lazily. 1756 */ 1757 static uint_t 1758 xnf_intr(caddr_t arg) 1759 { 1760 xnf_t *xnfp = (xnf_t *)arg; 1761 mblk_t *mp; 1762 boolean_t need_sched, clean_ring; 1763 1764 mutex_enter(&xnfp->xnf_rxlock); 1765 1766 /* 1767 * Interrupts before we are connected are spurious. 1768 */ 1769 if (!xnfp->xnf_connected) { 1770 mutex_exit(&xnfp->xnf_rxlock); 1771 xnfp->xnf_stat_unclaimed_interrupts++; 1772 return (DDI_INTR_UNCLAIMED); 1773 } 1774 1775 /* 1776 * Receive side processing. 1777 */ 1778 do { 1779 /* 1780 * Collect buffers from the ring. 1781 */ 1782 xnf_rx_collect(xnfp); 1783 1784 /* 1785 * Interrupt me when the next receive buffer is consumed. 1786 */ 1787 xnfp->xnf_rx_ring.sring->rsp_event = 1788 xnfp->xnf_rx_ring.rsp_cons + 1; 1789 xen_mb(); 1790 1791 } while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)); 1792 1793 if (xnfp->xnf_rx_new_buffers_posted) { 1794 boolean_t notify; 1795 1796 /* 1797 * Indicate to the peer that we have re-filled the 1798 * receive ring, if it cares. 1799 */ 1800 /* LINTED: constant in conditional context */ 1801 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify); 1802 if (notify) 1803 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1804 xnfp->xnf_rx_new_buffers_posted = B_FALSE; 1805 } 1806 1807 mp = xnfp->xnf_rx_head; 1808 xnfp->xnf_rx_head = xnfp->xnf_rx_tail = NULL; 1809 1810 xnfp->xnf_stat_interrupts++; 1811 mutex_exit(&xnfp->xnf_rxlock); 1812 1813 if (mp != NULL) 1814 mac_rx(xnfp->xnf_mh, NULL, mp); 1815 1816 /* 1817 * Transmit side processing. 1818 * 1819 * If a previous transmit attempt failed or we have pending 1820 * multicast requests, clean the ring. 1821 * 1822 * If we previously stalled transmission and cleaning produces 1823 * some free slots, tell upstream to attempt sending again. 1824 * 1825 * The odd style is to avoid acquiring xnf_txlock unless we 1826 * will actually look inside the tx machinery. 1827 */ 1828 mutex_enter(&xnfp->xnf_schedlock); 1829 need_sched = xnfp->xnf_need_sched; 1830 clean_ring = need_sched || (xnfp->xnf_pending_multicast > 0); 1831 mutex_exit(&xnfp->xnf_schedlock); 1832 1833 if (clean_ring) { 1834 int free_slots; 1835 1836 mutex_enter(&xnfp->xnf_txlock); 1837 free_slots = tx_slots_get(xnfp, 0, B_FALSE); 1838 1839 if (need_sched && (free_slots > 0)) { 1840 mutex_enter(&xnfp->xnf_schedlock); 1841 xnfp->xnf_need_sched = B_FALSE; 1842 mutex_exit(&xnfp->xnf_schedlock); 1843 1844 mac_tx_update(xnfp->xnf_mh); 1845 } 1846 mutex_exit(&xnfp->xnf_txlock); 1847 } 1848 1849 return (DDI_INTR_CLAIMED); 1850 } 1851 1852 /* 1853 * xnf_start() -- start the board receiving and enable interrupts. 1854 */ 1855 static int 1856 xnf_start(void *arg) 1857 { 1858 xnf_t *xnfp = arg; 1859 1860 #ifdef XNF_DEBUG 1861 if (xnf_debug & XNF_DEBUG_TRACE) 1862 printf("xnf%d start(0x%p)\n", 1863 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1864 #endif 1865 1866 mutex_enter(&xnfp->xnf_rxlock); 1867 mutex_enter(&xnfp->xnf_txlock); 1868 1869 /* Accept packets from above. */ 1870 xnfp->xnf_running = B_TRUE; 1871 1872 mutex_exit(&xnfp->xnf_txlock); 1873 mutex_exit(&xnfp->xnf_rxlock); 1874 1875 return (0); 1876 } 1877 1878 /* xnf_stop() - disable hardware */ 1879 static void 1880 xnf_stop(void *arg) 1881 { 1882 xnf_t *xnfp = arg; 1883 1884 #ifdef XNF_DEBUG 1885 if (xnf_debug & XNF_DEBUG_TRACE) 1886 printf("xnf%d stop(0x%p)\n", 1887 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1888 #endif 1889 1890 mutex_enter(&xnfp->xnf_rxlock); 1891 mutex_enter(&xnfp->xnf_txlock); 1892 1893 xnfp->xnf_running = B_FALSE; 1894 1895 mutex_exit(&xnfp->xnf_txlock); 1896 mutex_exit(&xnfp->xnf_rxlock); 1897 } 1898 1899 /* 1900 * Hang buffer `bdesc' on the RX ring. 1901 */ 1902 static void 1903 xnf_rxbuf_hang(xnf_t *xnfp, xnf_buf_t *bdesc) 1904 { 1905 netif_rx_request_t *reqp; 1906 RING_IDX hang_ix; 1907 1908 ASSERT(MUTEX_HELD(&xnfp->xnf_rxlock)); 1909 1910 reqp = RING_GET_REQUEST(&xnfp->xnf_rx_ring, 1911 xnfp->xnf_rx_ring.req_prod_pvt); 1912 hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0)); 1913 ASSERT(xnfp->xnf_rx_pkt_info[hang_ix] == NULL); 1914 1915 reqp->id = bdesc->id = hang_ix; 1916 reqp->gref = bdesc->grant_ref; 1917 1918 xnfp->xnf_rx_pkt_info[hang_ix] = bdesc; 1919 xnfp->xnf_rx_ring.req_prod_pvt++; 1920 1921 xnfp->xnf_rx_new_buffers_posted = B_TRUE; 1922 } 1923 1924 /* 1925 * Collect packets from the RX ring, storing them in `xnfp' for later 1926 * use. 1927 */ 1928 static void 1929 xnf_rx_collect(xnf_t *xnfp) 1930 { 1931 mblk_t *head, *tail; 1932 1933 ASSERT(MUTEX_HELD(&xnfp->xnf_rxlock)); 1934 1935 /* 1936 * Loop over unconsumed responses: 1937 * 1. get a response 1938 * 2. take corresponding buffer off recv. ring 1939 * 3. indicate this by setting slot to NULL 1940 * 4. create a new message and 1941 * 5. copy data in, adjust ptr 1942 */ 1943 1944 head = tail = NULL; 1945 1946 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1947 netif_rx_response_t *rxpkt; 1948 xnf_buf_t *bdesc; 1949 ssize_t len; 1950 size_t off; 1951 mblk_t *mp = NULL; 1952 boolean_t hwcsum = B_FALSE; 1953 grant_ref_t ref; 1954 1955 /* 1. */ 1956 rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring, 1957 xnfp->xnf_rx_ring.rsp_cons); 1958 1959 DTRACE_PROBE4(xnf_rx_got_rsp, int, (int)rxpkt->id, 1960 int, (int)rxpkt->offset, 1961 int, (int)rxpkt->flags, 1962 int, (int)rxpkt->status); 1963 1964 /* 1965 * 2. 1966 */ 1967 bdesc = xnfp->xnf_rx_pkt_info[rxpkt->id]; 1968 1969 /* 1970 * 3. 1971 */ 1972 xnfp->xnf_rx_pkt_info[rxpkt->id] = NULL; 1973 ASSERT(bdesc->id == rxpkt->id); 1974 1975 ref = bdesc->grant_ref; 1976 off = rxpkt->offset; 1977 len = rxpkt->status; 1978 1979 if (!xnfp->xnf_running) { 1980 DTRACE_PROBE4(xnf_rx_not_running, 1981 int, rxpkt->status, 1982 char *, bdesc->buf, int, rxpkt->offset, 1983 char *, ((char *)bdesc->buf) + rxpkt->offset); 1984 1985 xnfp->xnf_stat_drop++; 1986 1987 } else if (len <= 0) { 1988 DTRACE_PROBE4(xnf_rx_pkt_status_negative, 1989 int, rxpkt->status, 1990 char *, bdesc->buf, int, rxpkt->offset, 1991 char *, ((char *)bdesc->buf) + rxpkt->offset); 1992 1993 xnfp->xnf_stat_errrx++; 1994 1995 switch (len) { 1996 case 0: 1997 xnfp->xnf_stat_runt++; 1998 break; 1999 case NETIF_RSP_ERROR: 2000 xnfp->xnf_stat_mac_rcv_error++; 2001 break; 2002 case NETIF_RSP_DROPPED: 2003 xnfp->xnf_stat_norxbuf++; 2004 break; 2005 } 2006 2007 } else if (bdesc->grant_ref == INVALID_GRANT_REF) { 2008 cmn_err(CE_WARN, "Bad rx grant reference %d " 2009 "from domain %d", ref, 2010 xvdi_get_oeid(xnfp->xnf_devinfo)); 2011 2012 } else if ((off + len) > PAGESIZE) { 2013 cmn_err(CE_WARN, "Rx packet overflows page " 2014 "(offset %ld, length %ld) from domain %d", 2015 off, len, xvdi_get_oeid(xnfp->xnf_devinfo)); 2016 } else { 2017 xnf_buf_t *nbuf = NULL; 2018 2019 DTRACE_PROBE4(xnf_rx_packet, int, len, 2020 char *, bdesc->buf, int, off, 2021 char *, ((char *)bdesc->buf) + off); 2022 2023 ASSERT(off + len <= PAGEOFFSET); 2024 2025 if (rxpkt->flags & NETRXF_data_validated) 2026 hwcsum = B_TRUE; 2027 2028 /* 2029 * If the packet is below a pre-determined 2030 * size we will copy data out rather than 2031 * replace it. 2032 */ 2033 if (len > xnf_rx_copy_limit) 2034 nbuf = xnf_buf_get(xnfp, KM_NOSLEEP, B_FALSE); 2035 2036 /* 2037 * If we have a replacement buffer, attempt to 2038 * wrap the existing one with an mblk_t in 2039 * order that the upper layers of the stack 2040 * might use it directly. 2041 */ 2042 if (nbuf != NULL) { 2043 mp = desballoc((unsigned char *)bdesc->buf, 2044 bdesc->len, 0, &bdesc->free_rtn); 2045 if (mp == NULL) { 2046 xnfp->xnf_stat_rx_desballoc_fail++; 2047 xnfp->xnf_stat_norxbuf++; 2048 2049 xnf_buf_put(xnfp, nbuf, B_FALSE); 2050 nbuf = NULL; 2051 } else { 2052 mp->b_rptr = mp->b_rptr + off; 2053 mp->b_wptr = mp->b_rptr + len; 2054 2055 /* 2056 * Release the grant reference 2057 * associated with this buffer 2058 * - they are scarce and the 2059 * upper layers of the stack 2060 * don't need it. 2061 */ 2062 (void) gnttab_end_foreign_access_ref( 2063 bdesc->grant_ref, 0); 2064 gref_put(xnfp, bdesc->grant_ref); 2065 bdesc->grant_ref = INVALID_GRANT_REF; 2066 2067 bdesc = nbuf; 2068 } 2069 } 2070 2071 if (nbuf == NULL) { 2072 /* 2073 * No replacement buffer allocated - 2074 * attempt to copy the data out and 2075 * re-hang the existing buffer. 2076 */ 2077 2078 /* 4. */ 2079 mp = allocb(len, BPRI_MED); 2080 if (mp == NULL) { 2081 xnfp->xnf_stat_rx_allocb_fail++; 2082 xnfp->xnf_stat_norxbuf++; 2083 } else { 2084 /* 5. */ 2085 bcopy(bdesc->buf + off, mp->b_wptr, 2086 len); 2087 mp->b_wptr += len; 2088 } 2089 } 2090 } 2091 2092 /* Re-hang the buffer. */ 2093 xnf_rxbuf_hang(xnfp, bdesc); 2094 2095 if (mp != NULL) { 2096 if (hwcsum) { 2097 /* 2098 * If the peer says that the data has 2099 * been validated then we declare that 2100 * the full checksum has been 2101 * verified. 2102 * 2103 * We don't look at the "checksum 2104 * blank" flag, and hence could have a 2105 * packet here that we are asserting 2106 * is good with a blank checksum. 2107 * 2108 * The hardware checksum offload 2109 * specification says that we must 2110 * provide the actual checksum as well 2111 * as an assertion that it is valid, 2112 * but the protocol stack doesn't 2113 * actually use it and some other 2114 * drivers don't bother, so we don't. 2115 * If it was necessary we could grovel 2116 * in the packet to find it. 2117 */ 2118 (void) hcksum_assoc(mp, NULL, 2119 NULL, 0, 0, 0, 0, 2120 HCK_FULLCKSUM | 2121 HCK_FULLCKSUM_OK, 0); 2122 xnfp->xnf_stat_rx_cksum_no_need++; 2123 } 2124 if (head == NULL) { 2125 ASSERT(tail == NULL); 2126 2127 head = mp; 2128 } else { 2129 ASSERT(tail != NULL); 2130 2131 tail->b_next = mp; 2132 } 2133 tail = mp; 2134 2135 ASSERT(mp->b_next == NULL); 2136 2137 xnfp->xnf_stat_ipackets++; 2138 xnfp->xnf_stat_rbytes += len; 2139 } 2140 2141 xnfp->xnf_rx_ring.rsp_cons++; 2142 } 2143 2144 /* 2145 * Store the mblks we have collected. 2146 */ 2147 if (head != NULL) { 2148 ASSERT(tail != NULL); 2149 2150 if (xnfp->xnf_rx_head == NULL) { 2151 ASSERT(xnfp->xnf_rx_tail == NULL); 2152 2153 xnfp->xnf_rx_head = head; 2154 } else { 2155 ASSERT(xnfp->xnf_rx_tail != NULL); 2156 2157 xnfp->xnf_rx_tail->b_next = head; 2158 } 2159 xnfp->xnf_rx_tail = tail; 2160 } 2161 } 2162 2163 /* 2164 * xnf_alloc_dma_resources() -- initialize the drivers structures 2165 */ 2166 static int 2167 xnf_alloc_dma_resources(xnf_t *xnfp) 2168 { 2169 dev_info_t *devinfo = xnfp->xnf_devinfo; 2170 size_t len; 2171 ddi_dma_cookie_t dma_cookie; 2172 uint_t ncookies; 2173 int rc; 2174 caddr_t rptr; 2175 2176 /* 2177 * The code below allocates all the DMA data structures that 2178 * need to be released when the driver is detached. 2179 * 2180 * Allocate page for the transmit descriptor ring. 2181 */ 2182 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 2183 DDI_DMA_SLEEP, 0, &xnfp->xnf_tx_ring_dma_handle) != DDI_SUCCESS) 2184 goto alloc_error; 2185 2186 if (ddi_dma_mem_alloc(xnfp->xnf_tx_ring_dma_handle, 2187 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 2188 DDI_DMA_SLEEP, 0, &rptr, &len, 2189 &xnfp->xnf_tx_ring_dma_acchandle) != DDI_SUCCESS) { 2190 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2191 xnfp->xnf_tx_ring_dma_handle = NULL; 2192 goto alloc_error; 2193 } 2194 2195 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_tx_ring_dma_handle, NULL, 2196 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 2197 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 2198 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 2199 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2200 xnfp->xnf_tx_ring_dma_handle = NULL; 2201 xnfp->xnf_tx_ring_dma_acchandle = NULL; 2202 if (rc == DDI_DMA_NORESOURCES) 2203 goto alloc_error; 2204 else 2205 goto error; 2206 } 2207 2208 ASSERT(ncookies == 1); 2209 bzero(rptr, PAGESIZE); 2210 /* LINTED: constant in conditional context */ 2211 SHARED_RING_INIT((netif_tx_sring_t *)rptr); 2212 /* LINTED: constant in conditional context */ 2213 FRONT_RING_INIT(&xnfp->xnf_tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE); 2214 xnfp->xnf_tx_ring_phys_addr = dma_cookie.dmac_laddress; 2215 2216 /* 2217 * Allocate page for the receive descriptor ring. 2218 */ 2219 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 2220 DDI_DMA_SLEEP, 0, &xnfp->xnf_rx_ring_dma_handle) != DDI_SUCCESS) 2221 goto alloc_error; 2222 2223 if (ddi_dma_mem_alloc(xnfp->xnf_rx_ring_dma_handle, 2224 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 2225 DDI_DMA_SLEEP, 0, &rptr, &len, 2226 &xnfp->xnf_rx_ring_dma_acchandle) != DDI_SUCCESS) { 2227 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2228 xnfp->xnf_rx_ring_dma_handle = NULL; 2229 goto alloc_error; 2230 } 2231 2232 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_rx_ring_dma_handle, NULL, 2233 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 2234 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 2235 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 2236 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2237 xnfp->xnf_rx_ring_dma_handle = NULL; 2238 xnfp->xnf_rx_ring_dma_acchandle = NULL; 2239 if (rc == DDI_DMA_NORESOURCES) 2240 goto alloc_error; 2241 else 2242 goto error; 2243 } 2244 2245 ASSERT(ncookies == 1); 2246 bzero(rptr, PAGESIZE); 2247 /* LINTED: constant in conditional context */ 2248 SHARED_RING_INIT((netif_rx_sring_t *)rptr); 2249 /* LINTED: constant in conditional context */ 2250 FRONT_RING_INIT(&xnfp->xnf_rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE); 2251 xnfp->xnf_rx_ring_phys_addr = dma_cookie.dmac_laddress; 2252 2253 return (DDI_SUCCESS); 2254 2255 alloc_error: 2256 cmn_err(CE_WARN, "xnf%d: could not allocate enough DMA memory", 2257 ddi_get_instance(xnfp->xnf_devinfo)); 2258 error: 2259 xnf_release_dma_resources(xnfp); 2260 return (DDI_FAILURE); 2261 } 2262 2263 /* 2264 * Release all DMA resources in the opposite order from acquisition 2265 */ 2266 static void 2267 xnf_release_dma_resources(xnf_t *xnfp) 2268 { 2269 int i; 2270 2271 /* 2272 * Free receive buffers which are currently associated with 2273 * descriptors. 2274 */ 2275 mutex_enter(&xnfp->xnf_rxlock); 2276 for (i = 0; i < NET_RX_RING_SIZE; i++) { 2277 xnf_buf_t *bp; 2278 2279 if ((bp = xnfp->xnf_rx_pkt_info[i]) == NULL) 2280 continue; 2281 xnfp->xnf_rx_pkt_info[i] = NULL; 2282 xnf_buf_put(xnfp, bp, B_FALSE); 2283 } 2284 mutex_exit(&xnfp->xnf_rxlock); 2285 2286 /* Free the receive ring buffer. */ 2287 if (xnfp->xnf_rx_ring_dma_acchandle != NULL) { 2288 (void) ddi_dma_unbind_handle(xnfp->xnf_rx_ring_dma_handle); 2289 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 2290 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2291 xnfp->xnf_rx_ring_dma_acchandle = NULL; 2292 } 2293 /* Free the transmit ring buffer. */ 2294 if (xnfp->xnf_tx_ring_dma_acchandle != NULL) { 2295 (void) ddi_dma_unbind_handle(xnfp->xnf_tx_ring_dma_handle); 2296 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 2297 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2298 xnfp->xnf_tx_ring_dma_acchandle = NULL; 2299 } 2300 2301 } 2302 2303 /* 2304 * Release any packets and associated structures used by the TX ring. 2305 */ 2306 static void 2307 xnf_release_mblks(xnf_t *xnfp) 2308 { 2309 RING_IDX i; 2310 xnf_txid_t *tidp; 2311 2312 for (i = 0, tidp = &xnfp->xnf_tx_pkt_id[0]; 2313 i < NET_TX_RING_SIZE; 2314 i++, tidp++) { 2315 xnf_txbuf_t *txp = tidp->txbuf; 2316 2317 if (txp != NULL) { 2318 ASSERT(txp->tx_mp != NULL); 2319 freemsg(txp->tx_mp); 2320 2321 txid_put(xnfp, tidp); 2322 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 2323 } 2324 } 2325 } 2326 2327 static int 2328 xnf_buf_constructor(void *buf, void *arg, int kmflag) 2329 { 2330 int (*ddiflags)(caddr_t) = DDI_DMA_SLEEP; 2331 xnf_buf_t *bdesc = buf; 2332 xnf_t *xnfp = arg; 2333 ddi_dma_cookie_t dma_cookie; 2334 uint_t ncookies; 2335 size_t len; 2336 2337 if (kmflag & KM_NOSLEEP) 2338 ddiflags = DDI_DMA_DONTWAIT; 2339 2340 /* Allocate a DMA access handle for the buffer. */ 2341 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &buf_dma_attr, 2342 ddiflags, 0, &bdesc->dma_handle) != DDI_SUCCESS) 2343 goto failure; 2344 2345 /* Allocate DMA-able memory for buffer. */ 2346 if (ddi_dma_mem_alloc(bdesc->dma_handle, 2347 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, ddiflags, 0, 2348 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 2349 goto failure_1; 2350 2351 /* Bind to virtual address of buffer to get physical address. */ 2352 if (ddi_dma_addr_bind_handle(bdesc->dma_handle, NULL, 2353 bdesc->buf, len, DDI_DMA_RDWR | DDI_DMA_STREAMING, 2354 ddiflags, 0, &dma_cookie, &ncookies) != DDI_DMA_MAPPED) 2355 goto failure_2; 2356 ASSERT(ncookies == 1); 2357 2358 bdesc->free_rtn.free_func = xnf_buf_recycle; 2359 bdesc->free_rtn.free_arg = (caddr_t)bdesc; 2360 bdesc->xnfp = xnfp; 2361 bdesc->buf_phys = dma_cookie.dmac_laddress; 2362 bdesc->buf_mfn = pfn_to_mfn(xnf_btop(bdesc->buf_phys)); 2363 bdesc->len = dma_cookie.dmac_size; 2364 bdesc->grant_ref = INVALID_GRANT_REF; 2365 bdesc->gen = xnfp->xnf_gen; 2366 2367 atomic_add_64(&xnfp->xnf_stat_buf_allocated, 1); 2368 2369 return (0); 2370 2371 failure_2: 2372 ddi_dma_mem_free(&bdesc->acc_handle); 2373 2374 failure_1: 2375 ddi_dma_free_handle(&bdesc->dma_handle); 2376 2377 failure: 2378 2379 ASSERT(kmflag & KM_NOSLEEP); /* Cannot fail for KM_SLEEP. */ 2380 return (-1); 2381 } 2382 2383 static void 2384 xnf_buf_destructor(void *buf, void *arg) 2385 { 2386 xnf_buf_t *bdesc = buf; 2387 xnf_t *xnfp = arg; 2388 2389 (void) ddi_dma_unbind_handle(bdesc->dma_handle); 2390 ddi_dma_mem_free(&bdesc->acc_handle); 2391 ddi_dma_free_handle(&bdesc->dma_handle); 2392 2393 atomic_add_64(&xnfp->xnf_stat_buf_allocated, -1); 2394 } 2395 2396 static xnf_buf_t * 2397 xnf_buf_get(xnf_t *xnfp, int flags, boolean_t readonly) 2398 { 2399 grant_ref_t gref; 2400 xnf_buf_t *bufp; 2401 2402 /* 2403 * Usually grant references are more scarce than memory, so we 2404 * attempt to acquire a grant reference first. 2405 */ 2406 gref = gref_get(xnfp); 2407 if (gref == INVALID_GRANT_REF) 2408 return (NULL); 2409 2410 bufp = kmem_cache_alloc(xnfp->xnf_buf_cache, flags); 2411 if (bufp == NULL) { 2412 gref_put(xnfp, gref); 2413 return (NULL); 2414 } 2415 2416 ASSERT(bufp->grant_ref == INVALID_GRANT_REF); 2417 2418 bufp->grant_ref = gref; 2419 2420 if (bufp->gen != xnfp->xnf_gen) 2421 xnf_buf_refresh(bufp); 2422 2423 gnttab_grant_foreign_access_ref(bufp->grant_ref, 2424 xvdi_get_oeid(bufp->xnfp->xnf_devinfo), 2425 bufp->buf_mfn, readonly ? 1 : 0); 2426 2427 atomic_add_64(&xnfp->xnf_stat_buf_outstanding, 1); 2428 2429 return (bufp); 2430 } 2431 2432 static void 2433 xnf_buf_put(xnf_t *xnfp, xnf_buf_t *bufp, boolean_t readonly) 2434 { 2435 if (bufp->grant_ref != INVALID_GRANT_REF) { 2436 (void) gnttab_end_foreign_access_ref( 2437 bufp->grant_ref, readonly ? 1 : 0); 2438 gref_put(xnfp, bufp->grant_ref); 2439 bufp->grant_ref = INVALID_GRANT_REF; 2440 } 2441 2442 kmem_cache_free(xnfp->xnf_buf_cache, bufp); 2443 2444 atomic_add_64(&xnfp->xnf_stat_buf_outstanding, -1); 2445 } 2446 2447 /* 2448 * Refresh any cached data about a buffer after resume. 2449 */ 2450 static void 2451 xnf_buf_refresh(xnf_buf_t *bdesc) 2452 { 2453 bdesc->buf_mfn = pfn_to_mfn(xnf_btop(bdesc->buf_phys)); 2454 bdesc->gen = bdesc->xnfp->xnf_gen; 2455 } 2456 2457 /* 2458 * Streams `freeb' routine for `xnf_buf_t' when used as transmit 2459 * look-aside buffers. 2460 */ 2461 static void 2462 xnf_buf_recycle(xnf_buf_t *bdesc) 2463 { 2464 xnf_t *xnfp = bdesc->xnfp; 2465 2466 xnf_buf_put(xnfp, bdesc, B_TRUE); 2467 } 2468 2469 static int 2470 xnf_tx_buf_constructor(void *buf, void *arg, int kmflag) 2471 { 2472 int (*ddiflags)(caddr_t) = DDI_DMA_SLEEP; 2473 xnf_txbuf_t *txp = buf; 2474 xnf_t *xnfp = arg; 2475 2476 if (kmflag & KM_NOSLEEP) 2477 ddiflags = DDI_DMA_DONTWAIT; 2478 2479 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &buf_dma_attr, 2480 ddiflags, 0, &txp->tx_dma_handle) != DDI_SUCCESS) { 2481 ASSERT(kmflag & KM_NOSLEEP); /* Cannot fail for KM_SLEEP. */ 2482 return (-1); 2483 } 2484 2485 return (0); 2486 } 2487 2488 static void 2489 xnf_tx_buf_destructor(void *buf, void *arg) 2490 { 2491 _NOTE(ARGUNUSED(arg)); 2492 xnf_txbuf_t *txp = buf; 2493 2494 ddi_dma_free_handle(&txp->tx_dma_handle); 2495 } 2496 2497 /* 2498 * Statistics. 2499 */ 2500 static char *xnf_aux_statistics[] = { 2501 "tx_cksum_deferred", 2502 "rx_cksum_no_need", 2503 "interrupts", 2504 "unclaimed_interrupts", 2505 "tx_pullup", 2506 "tx_pagebndry", 2507 "tx_attempt", 2508 "buf_allocated", 2509 "buf_outstanding", 2510 "gref_outstanding", 2511 "gref_failure", 2512 "gref_peak", 2513 "rx_allocb_fail", 2514 "rx_desballoc_fail", 2515 }; 2516 2517 static int 2518 xnf_kstat_aux_update(kstat_t *ksp, int flag) 2519 { 2520 xnf_t *xnfp; 2521 kstat_named_t *knp; 2522 2523 if (flag != KSTAT_READ) 2524 return (EACCES); 2525 2526 xnfp = ksp->ks_private; 2527 knp = ksp->ks_data; 2528 2529 /* 2530 * Assignment order must match that of the names in 2531 * xnf_aux_statistics. 2532 */ 2533 (knp++)->value.ui64 = xnfp->xnf_stat_tx_cksum_deferred; 2534 (knp++)->value.ui64 = xnfp->xnf_stat_rx_cksum_no_need; 2535 2536 (knp++)->value.ui64 = xnfp->xnf_stat_interrupts; 2537 (knp++)->value.ui64 = xnfp->xnf_stat_unclaimed_interrupts; 2538 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pullup; 2539 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pagebndry; 2540 (knp++)->value.ui64 = xnfp->xnf_stat_tx_attempt; 2541 2542 (knp++)->value.ui64 = xnfp->xnf_stat_buf_allocated; 2543 (knp++)->value.ui64 = xnfp->xnf_stat_buf_outstanding; 2544 (knp++)->value.ui64 = xnfp->xnf_stat_gref_outstanding; 2545 (knp++)->value.ui64 = xnfp->xnf_stat_gref_failure; 2546 (knp++)->value.ui64 = xnfp->xnf_stat_gref_peak; 2547 (knp++)->value.ui64 = xnfp->xnf_stat_rx_allocb_fail; 2548 (knp++)->value.ui64 = xnfp->xnf_stat_rx_desballoc_fail; 2549 2550 return (0); 2551 } 2552 2553 static boolean_t 2554 xnf_kstat_init(xnf_t *xnfp) 2555 { 2556 int nstat = sizeof (xnf_aux_statistics) / 2557 sizeof (xnf_aux_statistics[0]); 2558 char **cp = xnf_aux_statistics; 2559 kstat_named_t *knp; 2560 2561 /* 2562 * Create and initialise kstats. 2563 */ 2564 if ((xnfp->xnf_kstat_aux = kstat_create("xnf", 2565 ddi_get_instance(xnfp->xnf_devinfo), 2566 "aux_statistics", "net", KSTAT_TYPE_NAMED, 2567 nstat, 0)) == NULL) 2568 return (B_FALSE); 2569 2570 xnfp->xnf_kstat_aux->ks_private = xnfp; 2571 xnfp->xnf_kstat_aux->ks_update = xnf_kstat_aux_update; 2572 2573 knp = xnfp->xnf_kstat_aux->ks_data; 2574 while (nstat > 0) { 2575 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 2576 2577 knp++; 2578 cp++; 2579 nstat--; 2580 } 2581 2582 kstat_install(xnfp->xnf_kstat_aux); 2583 2584 return (B_TRUE); 2585 } 2586 2587 static int 2588 xnf_stat(void *arg, uint_t stat, uint64_t *val) 2589 { 2590 xnf_t *xnfp = arg; 2591 2592 mutex_enter(&xnfp->xnf_rxlock); 2593 mutex_enter(&xnfp->xnf_txlock); 2594 2595 #define mac_stat(q, r) \ 2596 case (MAC_STAT_##q): \ 2597 *val = xnfp->xnf_stat_##r; \ 2598 break 2599 2600 #define ether_stat(q, r) \ 2601 case (ETHER_STAT_##q): \ 2602 *val = xnfp->xnf_stat_##r; \ 2603 break 2604 2605 switch (stat) { 2606 2607 mac_stat(IPACKETS, ipackets); 2608 mac_stat(OPACKETS, opackets); 2609 mac_stat(RBYTES, rbytes); 2610 mac_stat(OBYTES, obytes); 2611 mac_stat(NORCVBUF, norxbuf); 2612 mac_stat(IERRORS, errrx); 2613 mac_stat(NOXMTBUF, tx_defer); 2614 2615 ether_stat(MACRCV_ERRORS, mac_rcv_error); 2616 ether_stat(TOOSHORT_ERRORS, runt); 2617 2618 /* always claim to be in full duplex mode */ 2619 case ETHER_STAT_LINK_DUPLEX: 2620 *val = LINK_DUPLEX_FULL; 2621 break; 2622 2623 /* always claim to be at 1Gb/s link speed */ 2624 case MAC_STAT_IFSPEED: 2625 *val = 1000000000ull; 2626 break; 2627 2628 default: 2629 mutex_exit(&xnfp->xnf_txlock); 2630 mutex_exit(&xnfp->xnf_rxlock); 2631 2632 return (ENOTSUP); 2633 } 2634 2635 #undef mac_stat 2636 #undef ether_stat 2637 2638 mutex_exit(&xnfp->xnf_txlock); 2639 mutex_exit(&xnfp->xnf_rxlock); 2640 2641 return (0); 2642 } 2643 2644 static boolean_t 2645 xnf_getcapab(void *arg, mac_capab_t cap, void *cap_data) 2646 { 2647 _NOTE(ARGUNUSED(arg)); 2648 2649 switch (cap) { 2650 case MAC_CAPAB_HCKSUM: { 2651 uint32_t *capab = cap_data; 2652 2653 /* 2654 * Whilst the flag used to communicate with the IO 2655 * domain is called "NETTXF_csum_blank", the checksum 2656 * in the packet must contain the pseudo-header 2657 * checksum and not zero. 2658 * 2659 * To help out the IO domain, we might use 2660 * HCKSUM_INET_PARTIAL. Unfortunately our stack will 2661 * then use checksum offload for IPv6 packets, which 2662 * the IO domain can't handle. 2663 * 2664 * As a result, we declare outselves capable of 2665 * HCKSUM_INET_FULL_V4. This means that we receive 2666 * IPv4 packets from the stack with a blank checksum 2667 * field and must insert the pseudo-header checksum 2668 * before passing the packet to the IO domain. 2669 */ 2670 *capab = HCKSUM_INET_FULL_V4; 2671 break; 2672 } 2673 default: 2674 return (B_FALSE); 2675 } 2676 2677 return (B_TRUE); 2678 } 2679 2680 /* 2681 * The state of the peer has changed - react accordingly. 2682 */ 2683 static void 2684 oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 2685 void *arg, void *impl_data) 2686 { 2687 _NOTE(ARGUNUSED(id, arg)); 2688 xnf_t *xnfp = ddi_get_driver_private(dip); 2689 XenbusState new_state = *(XenbusState *)impl_data; 2690 2691 ASSERT(xnfp != NULL); 2692 2693 switch (new_state) { 2694 case XenbusStateUnknown: 2695 case XenbusStateInitialising: 2696 case XenbusStateInitialised: 2697 case XenbusStateClosing: 2698 case XenbusStateClosed: 2699 case XenbusStateReconfiguring: 2700 case XenbusStateReconfigured: 2701 break; 2702 2703 case XenbusStateInitWait: 2704 xnf_read_config(xnfp); 2705 2706 if (!xnfp->xnf_be_rx_copy) { 2707 cmn_err(CE_WARN, 2708 "The xnf driver requires a dom0 that " 2709 "supports 'feature-rx-copy'."); 2710 (void) xvdi_switch_state(xnfp->xnf_devinfo, 2711 XBT_NULL, XenbusStateClosed); 2712 break; 2713 } 2714 2715 /* 2716 * Connect to the backend. 2717 */ 2718 xnf_be_connect(xnfp); 2719 2720 /* 2721 * Our MAC address as discovered by xnf_read_config(). 2722 */ 2723 mac_unicst_update(xnfp->xnf_mh, xnfp->xnf_mac_addr); 2724 2725 break; 2726 2727 case XenbusStateConnected: 2728 mutex_enter(&xnfp->xnf_rxlock); 2729 mutex_enter(&xnfp->xnf_txlock); 2730 2731 xnfp->xnf_connected = B_TRUE; 2732 /* 2733 * Wake up any threads waiting to send data to 2734 * backend. 2735 */ 2736 cv_broadcast(&xnfp->xnf_cv_state); 2737 2738 mutex_exit(&xnfp->xnf_txlock); 2739 mutex_exit(&xnfp->xnf_rxlock); 2740 2741 /* 2742 * Kick the peer in case it missed any transmits 2743 * request in the TX ring. 2744 */ 2745 ec_notify_via_evtchn(xnfp->xnf_evtchn); 2746 2747 /* 2748 * There may already be completed receive requests in 2749 * the ring sent by backend after it gets connected 2750 * but before we see its state change here, so we call 2751 * xnf_intr() to handle them, if any. 2752 */ 2753 (void) xnf_intr((caddr_t)xnfp); 2754 2755 /* 2756 * Mark the link up now that we are connected. 2757 */ 2758 mac_link_update(xnfp->xnf_mh, LINK_STATE_UP); 2759 2760 /* 2761 * Tell the backend about the multicast addresses in 2762 * which we are interested. 2763 */ 2764 mac_multicast_refresh(xnfp->xnf_mh, NULL, xnfp, B_TRUE); 2765 2766 break; 2767 2768 default: 2769 break; 2770 } 2771 } 2772