1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * 29 * Copyright (c) 2004 Christian Limpach. 30 * All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. This section intentionally left blank. 41 * 4. The name of the author may not be used to endorse or promote products 42 * derived from this software without specific prior written permission. 43 * 44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 49 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 53 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 */ 55 /* 56 * Section 3 of the above license was updated in response to bug 6379571. 57 */ 58 59 /* 60 * xnf.c - GLDv3 network driver for domU. 61 */ 62 63 /* 64 * This driver uses four per-instance locks: 65 * 66 * xnf_gref_lock: 67 * 68 * Protects access to the grant reference list stored in 69 * xnf_gref_head. Grant references should be acquired and released 70 * using gref_get() and gref_put() respectively. 71 * 72 * xnf_schedlock: 73 * 74 * Protects: 75 * xnf_need_sched - used to record that a previous transmit attempt 76 * failed (and consequently it will be necessary to call 77 * mac_tx_update() when transmit resources are available). 78 * xnf_pending_multicast - the number of multicast requests that 79 * have been submitted to the backend for which we have not 80 * processed responses. 81 * 82 * xnf_txlock: 83 * 84 * Protects the transmit ring (xnf_tx_ring) and associated 85 * structures (notably xnf_tx_pkt_id and xnf_tx_pkt_id_head). 86 * 87 * xnf_rxlock: 88 * 89 * Protects the receive ring (xnf_rx_ring) and associated 90 * structures (notably xnf_rx_pkt_info). 91 * 92 * If driver-global state that affects both the transmit and receive 93 * rings is manipulated, both xnf_txlock and xnf_rxlock should be 94 * held, in that order. 95 * 96 * xnf_schedlock is acquired both whilst holding xnf_txlock and 97 * without. It should always be acquired after xnf_txlock if both are 98 * held. 99 * 100 * Notes: 101 * - atomic_add_64() is used to manipulate counters where we require 102 * accuracy. For counters intended only for observation by humans, 103 * post increment/decrement are used instead. 104 */ 105 106 #include <sys/types.h> 107 #include <sys/errno.h> 108 #include <sys/param.h> 109 #include <sys/sysmacros.h> 110 #include <sys/systm.h> 111 #include <sys/stream.h> 112 #include <sys/strsubr.h> 113 #include <sys/strsun.h> 114 #include <sys/conf.h> 115 #include <sys/ddi.h> 116 #include <sys/devops.h> 117 #include <sys/sunddi.h> 118 #include <sys/sunndi.h> 119 #include <sys/dlpi.h> 120 #include <sys/ethernet.h> 121 #include <sys/strsun.h> 122 #include <sys/pattr.h> 123 #include <inet/ip.h> 124 #include <inet/ip_impl.h> 125 #include <sys/gld.h> 126 #include <sys/modctl.h> 127 #include <sys/mac_provider.h> 128 #include <sys/mac_ether.h> 129 #include <sys/bootinfo.h> 130 #include <sys/mach_mmu.h> 131 #ifdef XPV_HVM_DRIVER 132 #include <sys/xpv_support.h> 133 #include <sys/hypervisor.h> 134 #else 135 #include <sys/hypervisor.h> 136 #include <sys/evtchn_impl.h> 137 #include <sys/balloon_impl.h> 138 #endif 139 #include <xen/public/io/netif.h> 140 #include <sys/gnttab.h> 141 #include <xen/sys/xendev.h> 142 #include <sys/sdt.h> 143 #include <sys/note.h> 144 #include <sys/debug.h> 145 146 #include <io/xnf.h> 147 148 #if defined(DEBUG) || defined(__lint) 149 #define XNF_DEBUG 150 #endif 151 152 #ifdef XNF_DEBUG 153 int xnf_debug = 0; 154 xnf_t *xnf_debug_instance = NULL; 155 #endif 156 157 /* 158 * On a 32 bit PAE system physical and machine addresses are larger 159 * than 32 bits. ddi_btop() on such systems take an unsigned long 160 * argument, and so addresses above 4G are truncated before ddi_btop() 161 * gets to see them. To avoid this, code the shift operation here. 162 */ 163 #define xnf_btop(addr) ((addr) >> PAGESHIFT) 164 165 unsigned int xnf_max_tx_frags = 1; 166 167 /* 168 * Should we use the multicast control feature if the backend provides 169 * it? 170 */ 171 boolean_t xnf_multicast_control = B_TRUE; 172 173 /* 174 * Received packets below this size are copied to a new streams buffer 175 * rather than being desballoc'ed. 176 * 177 * This value is chosen to accommodate traffic where there are a large 178 * number of small packets. For data showing a typical distribution, 179 * see: 180 * 181 * Sinha07a: 182 * Rishi Sinha, Christos Papadopoulos, and John 183 * Heidemann. Internet Packet Size Distributions: Some 184 * Observations. Technical Report ISI-TR-2007-643, 185 * USC/Information Sciences Institute, May, 2007. Orignally 186 * released October 2005 as web page 187 * http://netweb.usc.edu/~sinha/pkt-sizes/. 188 * <http://www.isi.edu/~johnh/PAPERS/Sinha07a.html>. 189 */ 190 size_t xnf_rx_copy_limit = 64; 191 192 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) 193 #define INVALID_GRANT_REF ((grant_ref_t)-1) 194 #define INVALID_TX_ID ((uint16_t)-1) 195 196 #define TX_ID_TO_TXID(p, id) (&((p)->xnf_tx_pkt_id[(id)])) 197 #define TX_ID_VALID(i) (((i) != INVALID_TX_ID) && ((i) < NET_TX_RING_SIZE)) 198 199 /* Required system entry points */ 200 static int xnf_attach(dev_info_t *, ddi_attach_cmd_t); 201 static int xnf_detach(dev_info_t *, ddi_detach_cmd_t); 202 203 /* Required driver entry points for Nemo */ 204 static int xnf_start(void *); 205 static void xnf_stop(void *); 206 static int xnf_set_mac_addr(void *, const uint8_t *); 207 static int xnf_set_multicast(void *, boolean_t, const uint8_t *); 208 static int xnf_set_promiscuous(void *, boolean_t); 209 static mblk_t *xnf_send(void *, mblk_t *); 210 static uint_t xnf_intr(caddr_t); 211 static int xnf_stat(void *, uint_t, uint64_t *); 212 static boolean_t xnf_getcapab(void *, mac_capab_t, void *); 213 214 /* Driver private functions */ 215 static int xnf_alloc_dma_resources(xnf_t *); 216 static void xnf_release_dma_resources(xnf_t *); 217 static void xnf_release_mblks(xnf_t *); 218 219 static int xnf_buf_constructor(void *, void *, int); 220 static void xnf_buf_destructor(void *, void *); 221 static xnf_buf_t *xnf_buf_get(xnf_t *, int, boolean_t); 222 #pragma inline(xnf_buf_get) 223 static void xnf_buf_put(xnf_t *, xnf_buf_t *, boolean_t); 224 #pragma inline(xnf_buf_put) 225 static void xnf_buf_refresh(xnf_buf_t *); 226 #pragma inline(xnf_buf_refresh) 227 static void xnf_buf_recycle(xnf_buf_t *); 228 229 static int xnf_tx_buf_constructor(void *, void *, int); 230 static void xnf_tx_buf_destructor(void *, void *); 231 232 static grant_ref_t gref_get(xnf_t *); 233 #pragma inline(gref_get) 234 static void gref_put(xnf_t *, grant_ref_t); 235 #pragma inline(gref_put) 236 237 static xnf_txid_t *txid_get(xnf_t *); 238 #pragma inline(txid_get) 239 static void txid_put(xnf_t *, xnf_txid_t *); 240 #pragma inline(txid_put) 241 242 void xnf_send_driver_status(int, int); 243 static void xnf_rxbuf_hang(xnf_t *, xnf_buf_t *); 244 static int xnf_tx_clean_ring(xnf_t *); 245 static void oe_state_change(dev_info_t *, ddi_eventcookie_t, 246 void *, void *); 247 static boolean_t xnf_kstat_init(xnf_t *); 248 static void xnf_rx_collect(xnf_t *); 249 250 static mac_callbacks_t xnf_callbacks = { 251 MC_GETCAPAB, 252 xnf_stat, 253 xnf_start, 254 xnf_stop, 255 xnf_set_promiscuous, 256 xnf_set_multicast, 257 xnf_set_mac_addr, 258 xnf_send, 259 NULL, 260 xnf_getcapab 261 }; 262 263 /* DMA attributes for network ring buffer */ 264 static ddi_dma_attr_t ringbuf_dma_attr = { 265 DMA_ATTR_V0, /* version of this structure */ 266 0, /* lowest usable address */ 267 0xffffffffffffffffULL, /* highest usable address */ 268 0x7fffffff, /* maximum DMAable byte count */ 269 MMU_PAGESIZE, /* alignment in bytes */ 270 0x7ff, /* bitmap of burst sizes */ 271 1, /* minimum transfer */ 272 0xffffffffU, /* maximum transfer */ 273 0xffffffffffffffffULL, /* maximum segment length */ 274 1, /* maximum number of segments */ 275 1, /* granularity */ 276 0, /* flags (reserved) */ 277 }; 278 279 /* DMA attributes for transmit and receive data */ 280 static ddi_dma_attr_t buf_dma_attr = { 281 DMA_ATTR_V0, /* version of this structure */ 282 0, /* lowest usable address */ 283 0xffffffffffffffffULL, /* highest usable address */ 284 0x7fffffff, /* maximum DMAable byte count */ 285 MMU_PAGESIZE, /* alignment in bytes */ 286 0x7ff, /* bitmap of burst sizes */ 287 1, /* minimum transfer */ 288 0xffffffffU, /* maximum transfer */ 289 0xffffffffffffffffULL, /* maximum segment length */ 290 1, /* maximum number of segments */ 291 1, /* granularity */ 292 0, /* flags (reserved) */ 293 }; 294 295 /* DMA access attributes for registers and descriptors */ 296 static ddi_device_acc_attr_t accattr = { 297 DDI_DEVICE_ATTR_V0, 298 DDI_STRUCTURE_LE_ACC, /* This is a little-endian device */ 299 DDI_STRICTORDER_ACC 300 }; 301 302 /* DMA access attributes for data: NOT to be byte swapped. */ 303 static ddi_device_acc_attr_t data_accattr = { 304 DDI_DEVICE_ATTR_V0, 305 DDI_NEVERSWAP_ACC, 306 DDI_STRICTORDER_ACC 307 }; 308 309 DDI_DEFINE_STREAM_OPS(xnf_dev_ops, nulldev, nulldev, xnf_attach, xnf_detach, 310 nodev, NULL, D_MP, NULL, ddi_quiesce_not_supported); 311 312 static struct modldrv xnf_modldrv = { 313 &mod_driverops, 314 "Virtual Ethernet driver", 315 &xnf_dev_ops 316 }; 317 318 static struct modlinkage modlinkage = { 319 MODREV_1, &xnf_modldrv, NULL 320 }; 321 322 int 323 _init(void) 324 { 325 int r; 326 327 mac_init_ops(&xnf_dev_ops, "xnf"); 328 r = mod_install(&modlinkage); 329 if (r != DDI_SUCCESS) 330 mac_fini_ops(&xnf_dev_ops); 331 332 return (r); 333 } 334 335 int 336 _fini(void) 337 { 338 return (EBUSY); /* XXPV should be removable */ 339 } 340 341 int 342 _info(struct modinfo *modinfop) 343 { 344 return (mod_info(&modlinkage, modinfop)); 345 } 346 347 /* 348 * Acquire a grant reference. 349 */ 350 static grant_ref_t 351 gref_get(xnf_t *xnfp) 352 { 353 grant_ref_t gref; 354 355 mutex_enter(&xnfp->xnf_gref_lock); 356 357 do { 358 gref = gnttab_claim_grant_reference(&xnfp->xnf_gref_head); 359 360 } while ((gref == INVALID_GRANT_REF) && 361 (gnttab_alloc_grant_references(16, &xnfp->xnf_gref_head) == 0)); 362 363 mutex_exit(&xnfp->xnf_gref_lock); 364 365 if (gref == INVALID_GRANT_REF) { 366 xnfp->xnf_stat_gref_failure++; 367 } else { 368 atomic_add_64(&xnfp->xnf_stat_gref_outstanding, 1); 369 if (xnfp->xnf_stat_gref_outstanding > xnfp->xnf_stat_gref_peak) 370 xnfp->xnf_stat_gref_peak = 371 xnfp->xnf_stat_gref_outstanding; 372 } 373 374 return (gref); 375 } 376 377 /* 378 * Release a grant reference. 379 */ 380 static void 381 gref_put(xnf_t *xnfp, grant_ref_t gref) 382 { 383 ASSERT(gref != INVALID_GRANT_REF); 384 385 mutex_enter(&xnfp->xnf_gref_lock); 386 gnttab_release_grant_reference(&xnfp->xnf_gref_head, gref); 387 mutex_exit(&xnfp->xnf_gref_lock); 388 389 atomic_add_64(&xnfp->xnf_stat_gref_outstanding, -1); 390 } 391 392 /* 393 * Acquire a transmit id. 394 */ 395 static xnf_txid_t * 396 txid_get(xnf_t *xnfp) 397 { 398 xnf_txid_t *tidp; 399 400 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 401 402 if (xnfp->xnf_tx_pkt_id_head == INVALID_TX_ID) 403 return (NULL); 404 405 ASSERT(TX_ID_VALID(xnfp->xnf_tx_pkt_id_head)); 406 407 tidp = TX_ID_TO_TXID(xnfp, xnfp->xnf_tx_pkt_id_head); 408 xnfp->xnf_tx_pkt_id_head = tidp->next; 409 tidp->next = INVALID_TX_ID; 410 411 ASSERT(tidp->txbuf == NULL); 412 413 return (tidp); 414 } 415 416 /* 417 * Release a transmit id. 418 */ 419 static void 420 txid_put(xnf_t *xnfp, xnf_txid_t *tidp) 421 { 422 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 423 ASSERT(TX_ID_VALID(tidp->id)); 424 ASSERT(tidp->next == INVALID_TX_ID); 425 426 tidp->txbuf = NULL; 427 tidp->next = xnfp->xnf_tx_pkt_id_head; 428 xnfp->xnf_tx_pkt_id_head = tidp->id; 429 } 430 431 /* 432 * Get `wanted' slots in the transmit ring, waiting for at least that 433 * number if `wait' is B_TRUE. Force the ring to be cleaned by setting 434 * `wanted' to zero. 435 * 436 * Return the number of slots available. 437 */ 438 static int 439 tx_slots_get(xnf_t *xnfp, int wanted, boolean_t wait) 440 { 441 int slotsfree; 442 boolean_t forced_clean = (wanted == 0); 443 444 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 445 446 /* LINTED: constant in conditional context */ 447 while (B_TRUE) { 448 slotsfree = RING_FREE_REQUESTS(&xnfp->xnf_tx_ring); 449 450 if ((slotsfree < wanted) || forced_clean) 451 slotsfree = xnf_tx_clean_ring(xnfp); 452 453 /* 454 * If there are more than we need free, tell other 455 * people to come looking again. We hold txlock, so we 456 * are able to take our slots before anyone else runs. 457 */ 458 if (slotsfree > wanted) 459 cv_broadcast(&xnfp->xnf_cv_tx_slots); 460 461 if (slotsfree >= wanted) 462 break; 463 464 if (!wait) 465 break; 466 467 cv_wait(&xnfp->xnf_cv_tx_slots, &xnfp->xnf_txlock); 468 } 469 470 ASSERT(slotsfree <= RING_SIZE(&(xnfp->xnf_tx_ring))); 471 472 return (slotsfree); 473 } 474 475 static int 476 xnf_setup_rings(xnf_t *xnfp) 477 { 478 domid_t oeid; 479 struct xenbus_device *xsd; 480 RING_IDX i; 481 int err; 482 xnf_txid_t *tidp; 483 xnf_buf_t **bdescp; 484 485 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 486 xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 487 488 if (xnfp->xnf_tx_ring_ref != INVALID_GRANT_REF) 489 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 490 491 err = gnttab_grant_foreign_access(oeid, 492 xnf_btop(pa_to_ma(xnfp->xnf_tx_ring_phys_addr)), 0); 493 if (err <= 0) { 494 err = -err; 495 xenbus_dev_error(xsd, err, "granting access to tx ring page"); 496 goto out; 497 } 498 xnfp->xnf_tx_ring_ref = (grant_ref_t)err; 499 500 if (xnfp->xnf_rx_ring_ref != INVALID_GRANT_REF) 501 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 502 503 err = gnttab_grant_foreign_access(oeid, 504 xnf_btop(pa_to_ma(xnfp->xnf_rx_ring_phys_addr)), 0); 505 if (err <= 0) { 506 err = -err; 507 xenbus_dev_error(xsd, err, "granting access to rx ring page"); 508 goto out; 509 } 510 xnfp->xnf_rx_ring_ref = (grant_ref_t)err; 511 512 mutex_enter(&xnfp->xnf_txlock); 513 514 /* 515 * Setup/cleanup the TX ring. Note that this can lose packets 516 * after a resume, but we expect to stagger on. 517 */ 518 xnfp->xnf_tx_pkt_id_head = INVALID_TX_ID; /* I.e. emtpy list. */ 519 for (i = 0, tidp = &xnfp->xnf_tx_pkt_id[0]; 520 i < NET_TX_RING_SIZE; 521 i++, tidp++) { 522 xnf_txbuf_t *txp; 523 524 tidp->id = i; 525 526 txp = tidp->txbuf; 527 if (txp == NULL) { 528 tidp->next = INVALID_TX_ID; /* Appease txid_put(). */ 529 txid_put(xnfp, tidp); 530 continue; 531 } 532 533 ASSERT(txp->tx_txreq.gref != INVALID_GRANT_REF); 534 ASSERT(txp->tx_mp != NULL); 535 536 switch (txp->tx_type) { 537 case TX_DATA: 538 VERIFY(gnttab_query_foreign_access(txp->tx_txreq.gref) 539 == 0); 540 541 if (txp->tx_bdesc == NULL) { 542 (void) gnttab_end_foreign_access_ref( 543 txp->tx_txreq.gref, 1); 544 gref_put(xnfp, txp->tx_txreq.gref); 545 (void) ddi_dma_unbind_handle( 546 txp->tx_dma_handle); 547 } else { 548 xnf_buf_put(xnfp, txp->tx_bdesc, B_TRUE); 549 } 550 551 freemsg(txp->tx_mp); 552 txid_put(xnfp, tidp); 553 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 554 555 break; 556 557 case TX_MCAST_REQ: 558 txp->tx_type = TX_MCAST_RSP; 559 txp->tx_status = NETIF_RSP_DROPPED; 560 cv_broadcast(&xnfp->xnf_cv_multicast); 561 562 /* 563 * The request consumed two slots in the ring, 564 * yet only a single xnf_txid_t is used. Step 565 * over the empty slot. 566 */ 567 i++; 568 ASSERT(i < NET_TX_RING_SIZE); 569 570 break; 571 572 case TX_MCAST_RSP: 573 break; 574 } 575 } 576 577 /* LINTED: constant in conditional context */ 578 SHARED_RING_INIT(xnfp->xnf_tx_ring.sring); 579 /* LINTED: constant in conditional context */ 580 FRONT_RING_INIT(&xnfp->xnf_tx_ring, 581 xnfp->xnf_tx_ring.sring, PAGESIZE); 582 583 mutex_exit(&xnfp->xnf_txlock); 584 585 mutex_enter(&xnfp->xnf_rxlock); 586 587 /* 588 * Clean out any buffers currently posted to the receive ring 589 * before we reset it. 590 */ 591 for (i = 0, bdescp = &xnfp->xnf_rx_pkt_info[0]; 592 i < NET_RX_RING_SIZE; 593 i++, bdescp++) { 594 if (*bdescp != NULL) { 595 xnf_buf_put(xnfp, *bdescp, B_FALSE); 596 *bdescp = NULL; 597 } 598 } 599 600 /* LINTED: constant in conditional context */ 601 SHARED_RING_INIT(xnfp->xnf_rx_ring.sring); 602 /* LINTED: constant in conditional context */ 603 FRONT_RING_INIT(&xnfp->xnf_rx_ring, 604 xnfp->xnf_rx_ring.sring, PAGESIZE); 605 606 /* 607 * Fill the ring with buffers. 608 */ 609 for (i = 0; i < NET_RX_RING_SIZE; i++) { 610 xnf_buf_t *bdesc; 611 612 bdesc = xnf_buf_get(xnfp, KM_SLEEP, B_FALSE); 613 VERIFY(bdesc != NULL); 614 xnf_rxbuf_hang(xnfp, bdesc); 615 } 616 617 /* LINTED: constant in conditional context */ 618 RING_PUSH_REQUESTS(&xnfp->xnf_rx_ring); 619 620 mutex_exit(&xnfp->xnf_rxlock); 621 622 return (0); 623 624 out: 625 if (xnfp->xnf_tx_ring_ref != INVALID_GRANT_REF) 626 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 627 xnfp->xnf_tx_ring_ref = INVALID_GRANT_REF; 628 629 if (xnfp->xnf_rx_ring_ref != INVALID_GRANT_REF) 630 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 631 xnfp->xnf_rx_ring_ref = INVALID_GRANT_REF; 632 633 return (err); 634 } 635 636 /* 637 * Connect driver to back end, called to set up communication with 638 * back end driver both initially and on resume after restore/migrate. 639 */ 640 void 641 xnf_be_connect(xnf_t *xnfp) 642 { 643 const char *message; 644 xenbus_transaction_t xbt; 645 struct xenbus_device *xsd; 646 char *xsname; 647 int err; 648 649 ASSERT(!xnfp->xnf_connected); 650 651 xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 652 xsname = xvdi_get_xsname(xnfp->xnf_devinfo); 653 654 err = xnf_setup_rings(xnfp); 655 if (err != 0) { 656 cmn_err(CE_WARN, "failed to set up tx/rx rings"); 657 xenbus_dev_error(xsd, err, "setting up ring"); 658 return; 659 } 660 661 again: 662 err = xenbus_transaction_start(&xbt); 663 if (err != 0) { 664 xenbus_dev_error(xsd, EIO, "starting transaction"); 665 return; 666 } 667 668 err = xenbus_printf(xbt, xsname, "tx-ring-ref", "%u", 669 xnfp->xnf_tx_ring_ref); 670 if (err != 0) { 671 message = "writing tx ring-ref"; 672 goto abort_transaction; 673 } 674 675 err = xenbus_printf(xbt, xsname, "rx-ring-ref", "%u", 676 xnfp->xnf_rx_ring_ref); 677 if (err != 0) { 678 message = "writing rx ring-ref"; 679 goto abort_transaction; 680 } 681 682 err = xenbus_printf(xbt, xsname, "event-channel", "%u", 683 xnfp->xnf_evtchn); 684 if (err != 0) { 685 message = "writing event-channel"; 686 goto abort_transaction; 687 } 688 689 err = xenbus_printf(xbt, xsname, "feature-rx-notify", "%d", 1); 690 if (err != 0) { 691 message = "writing feature-rx-notify"; 692 goto abort_transaction; 693 } 694 695 err = xenbus_printf(xbt, xsname, "request-rx-copy", "%d", 1); 696 if (err != 0) { 697 message = "writing request-rx-copy"; 698 goto abort_transaction; 699 } 700 701 if (xnfp->xnf_be_mcast_control) { 702 err = xenbus_printf(xbt, xsname, "request-multicast-control", 703 "%d", 1); 704 if (err != 0) { 705 message = "writing request-multicast-control"; 706 goto abort_transaction; 707 } 708 } 709 710 err = xvdi_switch_state(xnfp->xnf_devinfo, xbt, XenbusStateConnected); 711 if (err != 0) { 712 message = "switching state to XenbusStateConnected"; 713 goto abort_transaction; 714 } 715 716 err = xenbus_transaction_end(xbt, 0); 717 if (err != 0) { 718 if (err == EAGAIN) 719 goto again; 720 xenbus_dev_error(xsd, err, "completing transaction"); 721 } 722 723 return; 724 725 abort_transaction: 726 (void) xenbus_transaction_end(xbt, 1); 727 xenbus_dev_error(xsd, err, "%s", message); 728 } 729 730 /* 731 * Read configuration information from xenstore. 732 */ 733 void 734 xnf_read_config(xnf_t *xnfp) 735 { 736 int err, be_cap; 737 char mac[ETHERADDRL * 3]; 738 char *oename = xvdi_get_oename(xnfp->xnf_devinfo); 739 740 err = xenbus_scanf(XBT_NULL, oename, "mac", 741 "%s", (char *)&mac[0]); 742 if (err != 0) { 743 /* 744 * bad: we're supposed to be set up with a proper mac 745 * addr. at this point 746 */ 747 cmn_err(CE_WARN, "%s%d: no mac address", 748 ddi_driver_name(xnfp->xnf_devinfo), 749 ddi_get_instance(xnfp->xnf_devinfo)); 750 return; 751 } 752 if (ether_aton(mac, xnfp->xnf_mac_addr) != ETHERADDRL) { 753 err = ENOENT; 754 xenbus_dev_error(xvdi_get_xsd(xnfp->xnf_devinfo), ENOENT, 755 "parsing %s/mac", xvdi_get_xsname(xnfp->xnf_devinfo)); 756 return; 757 } 758 759 err = xenbus_scanf(XBT_NULL, oename, 760 "feature-rx-copy", "%d", &be_cap); 761 /* 762 * If we fail to read the store we assume that the key is 763 * absent, implying an older domain at the far end. Older 764 * domains cannot do HV copy. 765 */ 766 if (err != 0) 767 be_cap = 0; 768 xnfp->xnf_be_rx_copy = (be_cap != 0); 769 770 err = xenbus_scanf(XBT_NULL, oename, 771 "feature-multicast-control", "%d", &be_cap); 772 /* 773 * If we fail to read the store we assume that the key is 774 * absent, implying an older domain at the far end. Older 775 * domains do not support multicast control. 776 */ 777 if (err != 0) 778 be_cap = 0; 779 xnfp->xnf_be_mcast_control = (be_cap != 0) && xnf_multicast_control; 780 } 781 782 /* 783 * attach(9E) -- Attach a device to the system 784 */ 785 static int 786 xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) 787 { 788 mac_register_t *macp; 789 xnf_t *xnfp; 790 int err; 791 char cachename[32]; 792 793 #ifdef XNF_DEBUG 794 if (xnf_debug & XNF_DEBUG_DDI) 795 printf("xnf%d: attach(0x%p)\n", ddi_get_instance(devinfo), 796 (void *)devinfo); 797 #endif 798 799 switch (cmd) { 800 case DDI_RESUME: 801 xnfp = ddi_get_driver_private(devinfo); 802 xnfp->xnf_gen++; 803 804 (void) xvdi_resume(devinfo); 805 (void) xvdi_alloc_evtchn(devinfo); 806 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 807 #ifdef XPV_HVM_DRIVER 808 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, 809 xnfp); 810 #else 811 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, 812 (caddr_t)xnfp); 813 #endif 814 return (DDI_SUCCESS); 815 816 case DDI_ATTACH: 817 break; 818 819 default: 820 return (DDI_FAILURE); 821 } 822 823 /* 824 * Allocate gld_mac_info_t and xnf_instance structures 825 */ 826 macp = mac_alloc(MAC_VERSION); 827 if (macp == NULL) 828 return (DDI_FAILURE); 829 xnfp = kmem_zalloc(sizeof (*xnfp), KM_SLEEP); 830 831 macp->m_dip = devinfo; 832 macp->m_driver = xnfp; 833 xnfp->xnf_devinfo = devinfo; 834 835 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 836 macp->m_src_addr = xnfp->xnf_mac_addr; 837 macp->m_callbacks = &xnf_callbacks; 838 macp->m_min_sdu = 0; 839 macp->m_max_sdu = XNF_MAXPKT; 840 841 xnfp->xnf_running = B_FALSE; 842 xnfp->xnf_connected = B_FALSE; 843 xnfp->xnf_be_rx_copy = B_FALSE; 844 xnfp->xnf_be_mcast_control = B_FALSE; 845 xnfp->xnf_need_sched = B_FALSE; 846 847 xnfp->xnf_rx_head = NULL; 848 xnfp->xnf_rx_tail = NULL; 849 xnfp->xnf_rx_new_buffers_posted = B_FALSE; 850 851 #ifdef XPV_HVM_DRIVER 852 /* 853 * Report our version to dom0. 854 */ 855 if (xenbus_printf(XBT_NULL, "guest/xnf", "version", "%d", 856 HVMPV_XNF_VERS)) 857 cmn_err(CE_WARN, "xnf: couldn't write version\n"); 858 #endif 859 860 /* 861 * Get the iblock cookie with which to initialize the mutexes. 862 */ 863 if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->xnf_icookie) 864 != DDI_SUCCESS) 865 goto failure; 866 867 mutex_init(&xnfp->xnf_txlock, 868 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 869 mutex_init(&xnfp->xnf_rxlock, 870 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 871 mutex_init(&xnfp->xnf_schedlock, 872 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 873 mutex_init(&xnfp->xnf_gref_lock, 874 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 875 876 cv_init(&xnfp->xnf_cv_state, NULL, CV_DEFAULT, NULL); 877 cv_init(&xnfp->xnf_cv_multicast, NULL, CV_DEFAULT, NULL); 878 cv_init(&xnfp->xnf_cv_tx_slots, NULL, CV_DEFAULT, NULL); 879 880 (void) sprintf(cachename, "xnf_buf_cache_%d", 881 ddi_get_instance(devinfo)); 882 xnfp->xnf_buf_cache = kmem_cache_create(cachename, 883 sizeof (xnf_buf_t), 0, 884 xnf_buf_constructor, xnf_buf_destructor, 885 NULL, xnfp, NULL, 0); 886 if (xnfp->xnf_buf_cache == NULL) 887 goto failure_0; 888 889 (void) sprintf(cachename, "xnf_tx_buf_cache_%d", 890 ddi_get_instance(devinfo)); 891 xnfp->xnf_tx_buf_cache = kmem_cache_create(cachename, 892 sizeof (xnf_txbuf_t), 0, 893 xnf_tx_buf_constructor, xnf_tx_buf_destructor, 894 NULL, xnfp, NULL, 0); 895 if (xnfp->xnf_tx_buf_cache == NULL) 896 goto failure_1; 897 898 xnfp->xnf_gref_head = INVALID_GRANT_REF; 899 900 if (xnf_alloc_dma_resources(xnfp) == DDI_FAILURE) { 901 cmn_err(CE_WARN, "xnf%d: failed to allocate and initialize " 902 "driver data structures", 903 ddi_get_instance(xnfp->xnf_devinfo)); 904 goto failure_2; 905 } 906 907 xnfp->xnf_rx_ring.sring->rsp_event = 908 xnfp->xnf_tx_ring.sring->rsp_event = 1; 909 910 xnfp->xnf_tx_ring_ref = INVALID_GRANT_REF; 911 xnfp->xnf_rx_ring_ref = INVALID_GRANT_REF; 912 913 /* set driver private pointer now */ 914 ddi_set_driver_private(devinfo, xnfp); 915 916 if (!xnf_kstat_init(xnfp)) 917 goto failure_3; 918 919 /* 920 * Allocate an event channel, add the interrupt handler and 921 * bind it to the event channel. 922 */ 923 (void) xvdi_alloc_evtchn(devinfo); 924 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 925 #ifdef XPV_HVM_DRIVER 926 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, xnfp); 927 #else 928 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, (caddr_t)xnfp); 929 #endif 930 931 err = mac_register(macp, &xnfp->xnf_mh); 932 mac_free(macp); 933 macp = NULL; 934 if (err != 0) 935 goto failure_4; 936 937 if (xvdi_add_event_handler(devinfo, XS_OE_STATE, oe_state_change, NULL) 938 != DDI_SUCCESS) 939 goto failure_5; 940 941 #ifdef XPV_HVM_DRIVER 942 /* 943 * In the HVM case, this driver essentially replaces a driver for 944 * a 'real' PCI NIC. Without the "model" property set to 945 * "Ethernet controller", like the PCI code does, netbooting does 946 * not work correctly, as strplumb_get_netdev_path() will not find 947 * this interface. 948 */ 949 (void) ndi_prop_update_string(DDI_DEV_T_NONE, devinfo, "model", 950 "Ethernet controller"); 951 #endif 952 953 #ifdef XNF_DEBUG 954 if (xnf_debug_instance == NULL) 955 xnf_debug_instance = xnfp; 956 #endif 957 958 return (DDI_SUCCESS); 959 960 failure_5: 961 (void) mac_unregister(xnfp->xnf_mh); 962 963 failure_4: 964 #ifdef XPV_HVM_DRIVER 965 ec_unbind_evtchn(xnfp->xnf_evtchn); 966 xvdi_free_evtchn(devinfo); 967 #else 968 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 969 #endif 970 xnfp->xnf_evtchn = INVALID_EVTCHN; 971 kstat_delete(xnfp->xnf_kstat_aux); 972 973 failure_3: 974 xnf_release_dma_resources(xnfp); 975 976 failure_2: 977 kmem_cache_destroy(xnfp->xnf_tx_buf_cache); 978 979 failure_1: 980 kmem_cache_destroy(xnfp->xnf_buf_cache); 981 982 failure_0: 983 cv_destroy(&xnfp->xnf_cv_tx_slots); 984 cv_destroy(&xnfp->xnf_cv_multicast); 985 cv_destroy(&xnfp->xnf_cv_state); 986 987 mutex_destroy(&xnfp->xnf_gref_lock); 988 mutex_destroy(&xnfp->xnf_schedlock); 989 mutex_destroy(&xnfp->xnf_rxlock); 990 mutex_destroy(&xnfp->xnf_txlock); 991 992 failure: 993 kmem_free(xnfp, sizeof (*xnfp)); 994 if (macp != NULL) 995 mac_free(macp); 996 997 return (DDI_FAILURE); 998 } 999 1000 /* detach(9E) -- Detach a device from the system */ 1001 static int 1002 xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) 1003 { 1004 xnf_t *xnfp; /* Our private device info */ 1005 1006 #ifdef XNF_DEBUG 1007 if (xnf_debug & XNF_DEBUG_DDI) 1008 printf("xnf_detach(0x%p)\n", (void *)devinfo); 1009 #endif 1010 1011 xnfp = ddi_get_driver_private(devinfo); 1012 1013 switch (cmd) { 1014 case DDI_SUSPEND: 1015 #ifdef XPV_HVM_DRIVER 1016 ec_unbind_evtchn(xnfp->xnf_evtchn); 1017 xvdi_free_evtchn(devinfo); 1018 #else 1019 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 1020 #endif 1021 1022 xvdi_suspend(devinfo); 1023 1024 mutex_enter(&xnfp->xnf_rxlock); 1025 mutex_enter(&xnfp->xnf_txlock); 1026 1027 xnfp->xnf_evtchn = INVALID_EVTCHN; 1028 xnfp->xnf_connected = B_FALSE; 1029 mutex_exit(&xnfp->xnf_txlock); 1030 mutex_exit(&xnfp->xnf_rxlock); 1031 1032 /* claim link to be down after disconnect */ 1033 mac_link_update(xnfp->xnf_mh, LINK_STATE_DOWN); 1034 return (DDI_SUCCESS); 1035 1036 case DDI_DETACH: 1037 break; 1038 1039 default: 1040 return (DDI_FAILURE); 1041 } 1042 1043 if (xnfp->xnf_connected) 1044 return (DDI_FAILURE); 1045 1046 /* 1047 * Cannot detach if we have xnf_buf_t outstanding. 1048 */ 1049 if (xnfp->xnf_stat_buf_allocated > 0) 1050 return (DDI_FAILURE); 1051 1052 if (mac_unregister(xnfp->xnf_mh) != 0) 1053 return (DDI_FAILURE); 1054 1055 kstat_delete(xnfp->xnf_kstat_aux); 1056 1057 /* Stop the receiver */ 1058 xnf_stop(xnfp); 1059 1060 xvdi_remove_event_handler(devinfo, XS_OE_STATE); 1061 1062 /* Remove the interrupt */ 1063 #ifdef XPV_HVM_DRIVER 1064 ec_unbind_evtchn(xnfp->xnf_evtchn); 1065 xvdi_free_evtchn(devinfo); 1066 #else 1067 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 1068 #endif 1069 1070 /* Release any pending xmit mblks */ 1071 xnf_release_mblks(xnfp); 1072 1073 /* Release all DMA resources */ 1074 xnf_release_dma_resources(xnfp); 1075 1076 cv_destroy(&xnfp->xnf_cv_tx_slots); 1077 cv_destroy(&xnfp->xnf_cv_multicast); 1078 cv_destroy(&xnfp->xnf_cv_state); 1079 1080 kmem_cache_destroy(xnfp->xnf_tx_buf_cache); 1081 kmem_cache_destroy(xnfp->xnf_buf_cache); 1082 1083 mutex_destroy(&xnfp->xnf_gref_lock); 1084 mutex_destroy(&xnfp->xnf_schedlock); 1085 mutex_destroy(&xnfp->xnf_rxlock); 1086 mutex_destroy(&xnfp->xnf_txlock); 1087 1088 kmem_free(xnfp, sizeof (*xnfp)); 1089 1090 return (DDI_SUCCESS); 1091 } 1092 1093 /* 1094 * xnf_set_mac_addr() -- set the physical network address on the board. 1095 */ 1096 static int 1097 xnf_set_mac_addr(void *arg, const uint8_t *macaddr) 1098 { 1099 _NOTE(ARGUNUSED(arg, macaddr)); 1100 1101 /* 1102 * We can't set our macaddr. 1103 */ 1104 return (ENOTSUP); 1105 } 1106 1107 /* 1108 * xnf_set_multicast() -- set (enable) or disable a multicast address. 1109 * 1110 * Program the hardware to enable/disable the multicast address 1111 * in "mca". Enable if "add" is true, disable if false. 1112 */ 1113 static int 1114 xnf_set_multicast(void *arg, boolean_t add, const uint8_t *mca) 1115 { 1116 xnf_t *xnfp = arg; 1117 xnf_txbuf_t *txp; 1118 int n_slots; 1119 RING_IDX slot; 1120 xnf_txid_t *tidp; 1121 netif_tx_request_t *txrp; 1122 struct netif_extra_info *erp; 1123 boolean_t notify, result; 1124 1125 /* 1126 * If the backend does not support multicast control then we 1127 * must assume that the right packets will just arrive. 1128 */ 1129 if (!xnfp->xnf_be_mcast_control) 1130 return (0); 1131 1132 txp = kmem_cache_alloc(xnfp->xnf_tx_buf_cache, KM_SLEEP); 1133 if (txp == NULL) 1134 return (1); 1135 1136 mutex_enter(&xnfp->xnf_txlock); 1137 1138 /* 1139 * If we're not yet connected then claim success. This is 1140 * acceptable because we refresh the entire set of multicast 1141 * addresses when we get connected. 1142 * 1143 * We can't wait around here because the MAC layer expects 1144 * this to be a non-blocking operation - waiting ends up 1145 * causing a deadlock during resume. 1146 */ 1147 if (!xnfp->xnf_connected) { 1148 mutex_exit(&xnfp->xnf_txlock); 1149 return (0); 1150 } 1151 1152 /* 1153 * 1. Acquire two slots in the ring. 1154 * 2. Fill in the slots. 1155 * 3. Request notification when the operation is done. 1156 * 4. Kick the peer. 1157 * 5. Wait for the response via xnf_tx_clean_ring(). 1158 */ 1159 1160 n_slots = tx_slots_get(xnfp, 2, B_TRUE); 1161 ASSERT(n_slots >= 2); 1162 1163 slot = xnfp->xnf_tx_ring.req_prod_pvt; 1164 tidp = txid_get(xnfp); 1165 VERIFY(tidp != NULL); 1166 1167 txp->tx_type = TX_MCAST_REQ; 1168 txp->tx_slot = slot; 1169 1170 txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot); 1171 erp = (struct netif_extra_info *) 1172 RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot + 1); 1173 1174 txrp->gref = 0; 1175 txrp->size = 0; 1176 txrp->offset = 0; 1177 /* Set tx_txreq.id to appease xnf_tx_clean_ring(). */ 1178 txrp->id = txp->tx_txreq.id = tidp->id; 1179 txrp->flags = NETTXF_extra_info; 1180 1181 erp->type = add ? XEN_NETIF_EXTRA_TYPE_MCAST_ADD : 1182 XEN_NETIF_EXTRA_TYPE_MCAST_DEL; 1183 bcopy((void *)mca, &erp->u.mcast.addr, ETHERADDRL); 1184 1185 tidp->txbuf = txp; 1186 1187 xnfp->xnf_tx_ring.req_prod_pvt = slot + 2; 1188 1189 mutex_enter(&xnfp->xnf_schedlock); 1190 xnfp->xnf_pending_multicast++; 1191 mutex_exit(&xnfp->xnf_schedlock); 1192 1193 /* LINTED: constant in conditional context */ 1194 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring, 1195 notify); 1196 if (notify) 1197 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1198 1199 while (txp->tx_type == TX_MCAST_REQ) 1200 cv_wait(&xnfp->xnf_cv_multicast, 1201 &xnfp->xnf_txlock); 1202 1203 ASSERT(txp->tx_type == TX_MCAST_RSP); 1204 1205 mutex_enter(&xnfp->xnf_schedlock); 1206 xnfp->xnf_pending_multicast--; 1207 mutex_exit(&xnfp->xnf_schedlock); 1208 1209 result = (txp->tx_status == NETIF_RSP_OKAY); 1210 1211 txid_put(xnfp, tidp); 1212 1213 mutex_exit(&xnfp->xnf_txlock); 1214 1215 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1216 1217 return (result ? 0 : 1); 1218 } 1219 1220 /* 1221 * xnf_set_promiscuous() -- set or reset promiscuous mode on the board 1222 * 1223 * Program the hardware to enable/disable promiscuous mode. 1224 */ 1225 static int 1226 xnf_set_promiscuous(void *arg, boolean_t on) 1227 { 1228 _NOTE(ARGUNUSED(arg, on)); 1229 1230 /* 1231 * We can't really do this, but we pretend that we can in 1232 * order that snoop will work. 1233 */ 1234 return (0); 1235 } 1236 1237 /* 1238 * Clean buffers that we have responses for from the transmit ring. 1239 */ 1240 static int 1241 xnf_tx_clean_ring(xnf_t *xnfp) 1242 { 1243 boolean_t work_to_do; 1244 1245 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 1246 1247 loop: 1248 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_tx_ring)) { 1249 RING_IDX cons, prod, i; 1250 1251 cons = xnfp->xnf_tx_ring.rsp_cons; 1252 prod = xnfp->xnf_tx_ring.sring->rsp_prod; 1253 membar_consumer(); 1254 /* 1255 * Clean tx requests from ring that we have responses 1256 * for. 1257 */ 1258 DTRACE_PROBE2(xnf_tx_clean_range, int, cons, int, prod); 1259 for (i = cons; i != prod; i++) { 1260 netif_tx_response_t *trp; 1261 xnf_txid_t *tidp; 1262 xnf_txbuf_t *txp; 1263 1264 trp = RING_GET_RESPONSE(&xnfp->xnf_tx_ring, i); 1265 ASSERT(TX_ID_VALID(trp->id)); 1266 1267 tidp = TX_ID_TO_TXID(xnfp, trp->id); 1268 ASSERT(tidp->id == trp->id); 1269 ASSERT(tidp->next == INVALID_TX_ID); 1270 1271 txp = tidp->txbuf; 1272 ASSERT(txp != NULL); 1273 ASSERT(txp->tx_txreq.id == trp->id); 1274 1275 switch (txp->tx_type) { 1276 case TX_DATA: 1277 if (gnttab_query_foreign_access( 1278 txp->tx_txreq.gref) != 0) 1279 cmn_err(CE_PANIC, 1280 "tx grant %d still in use by " 1281 "backend domain", 1282 txp->tx_txreq.gref); 1283 1284 if (txp->tx_bdesc == NULL) { 1285 (void) gnttab_end_foreign_access_ref( 1286 txp->tx_txreq.gref, 1); 1287 gref_put(xnfp, txp->tx_txreq.gref); 1288 (void) ddi_dma_unbind_handle( 1289 txp->tx_dma_handle); 1290 } else { 1291 xnf_buf_put(xnfp, txp->tx_bdesc, 1292 B_TRUE); 1293 } 1294 1295 freemsg(txp->tx_mp); 1296 txid_put(xnfp, tidp); 1297 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1298 1299 break; 1300 1301 case TX_MCAST_REQ: 1302 txp->tx_type = TX_MCAST_RSP; 1303 txp->tx_status = trp->status; 1304 cv_broadcast(&xnfp->xnf_cv_multicast); 1305 1306 break; 1307 1308 case TX_MCAST_RSP: 1309 break; 1310 1311 default: 1312 cmn_err(CE_PANIC, "xnf_tx_clean_ring: " 1313 "invalid xnf_txbuf_t type: %d", 1314 txp->tx_type); 1315 break; 1316 } 1317 } 1318 /* 1319 * Record the last response we dealt with so that we 1320 * know where to start next time around. 1321 */ 1322 xnfp->xnf_tx_ring.rsp_cons = prod; 1323 membar_enter(); 1324 } 1325 1326 /* LINTED: constant in conditional context */ 1327 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_tx_ring, work_to_do); 1328 if (work_to_do) 1329 goto loop; 1330 1331 return (RING_FREE_REQUESTS(&xnfp->xnf_tx_ring)); 1332 } 1333 1334 /* 1335 * Allocate and fill in a look-aside buffer for the packet `mp'. Used 1336 * to ensure that the packet is physically contiguous and contained 1337 * within a single page. 1338 */ 1339 static xnf_buf_t * 1340 xnf_tx_pullup(xnf_t *xnfp, mblk_t *mp) 1341 { 1342 xnf_buf_t *bd; 1343 caddr_t bp; 1344 1345 bd = xnf_buf_get(xnfp, KM_SLEEP, B_TRUE); 1346 if (bd == NULL) 1347 return (NULL); 1348 1349 bp = bd->buf; 1350 while (mp != NULL) { 1351 size_t len = MBLKL(mp); 1352 1353 bcopy(mp->b_rptr, bp, len); 1354 bp += len; 1355 1356 mp = mp->b_cont; 1357 } 1358 1359 ASSERT((bp - bd->buf) <= PAGESIZE); 1360 1361 xnfp->xnf_stat_tx_pullup++; 1362 1363 return (bd); 1364 } 1365 1366 /* 1367 * Insert the pseudo-header checksum into the packet `buf'. 1368 */ 1369 void 1370 xnf_pseudo_cksum(caddr_t buf, int length) 1371 { 1372 struct ether_header *ehp; 1373 uint16_t sap, len, *stuff; 1374 uint32_t cksum; 1375 size_t offset; 1376 ipha_t *ipha; 1377 ipaddr_t src, dst; 1378 1379 ASSERT(length >= sizeof (*ehp)); 1380 ehp = (struct ether_header *)buf; 1381 1382 if (ntohs(ehp->ether_type) == VLAN_TPID) { 1383 struct ether_vlan_header *evhp; 1384 1385 ASSERT(length >= sizeof (*evhp)); 1386 evhp = (struct ether_vlan_header *)buf; 1387 sap = ntohs(evhp->ether_type); 1388 offset = sizeof (*evhp); 1389 } else { 1390 sap = ntohs(ehp->ether_type); 1391 offset = sizeof (*ehp); 1392 } 1393 1394 ASSERT(sap == ETHERTYPE_IP); 1395 1396 /* Packet should have been pulled up by the caller. */ 1397 if ((offset + sizeof (ipha_t)) > length) { 1398 cmn_err(CE_WARN, "xnf_pseudo_cksum: no room for checksum"); 1399 return; 1400 } 1401 1402 ipha = (ipha_t *)(buf + offset); 1403 1404 ASSERT(IPH_HDR_LENGTH(ipha) == IP_SIMPLE_HDR_LENGTH); 1405 1406 len = ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH; 1407 1408 switch (ipha->ipha_protocol) { 1409 case IPPROTO_TCP: 1410 stuff = IPH_TCPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 1411 cksum = IP_TCP_CSUM_COMP; 1412 break; 1413 case IPPROTO_UDP: 1414 stuff = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 1415 cksum = IP_UDP_CSUM_COMP; 1416 break; 1417 default: 1418 cmn_err(CE_WARN, "xnf_pseudo_cksum: unexpected protocol %d", 1419 ipha->ipha_protocol); 1420 return; 1421 } 1422 1423 src = ipha->ipha_src; 1424 dst = ipha->ipha_dst; 1425 1426 cksum += (dst >> 16) + (dst & 0xFFFF); 1427 cksum += (src >> 16) + (src & 0xFFFF); 1428 cksum += htons(len); 1429 1430 cksum = (cksum >> 16) + (cksum & 0xFFFF); 1431 cksum = (cksum >> 16) + (cksum & 0xFFFF); 1432 1433 ASSERT(cksum <= 0xFFFF); 1434 1435 *stuff = (uint16_t)(cksum ? cksum : ~cksum); 1436 } 1437 1438 /* 1439 * Push a list of prepared packets (`txp') into the transmit ring. 1440 */ 1441 static xnf_txbuf_t * 1442 tx_push_packets(xnf_t *xnfp, xnf_txbuf_t *txp) 1443 { 1444 int slots_free; 1445 RING_IDX slot; 1446 boolean_t notify; 1447 1448 mutex_enter(&xnfp->xnf_txlock); 1449 1450 ASSERT(xnfp->xnf_running); 1451 1452 /* 1453 * Wait until we are connected to the backend. 1454 */ 1455 while (!xnfp->xnf_connected) 1456 cv_wait(&xnfp->xnf_cv_state, &xnfp->xnf_txlock); 1457 1458 slots_free = tx_slots_get(xnfp, 1, B_FALSE); 1459 DTRACE_PROBE1(xnf_send_slotsfree, int, slots_free); 1460 1461 slot = xnfp->xnf_tx_ring.req_prod_pvt; 1462 1463 while ((txp != NULL) && (slots_free > 0)) { 1464 xnf_txid_t *tidp; 1465 netif_tx_request_t *txrp; 1466 1467 tidp = txid_get(xnfp); 1468 VERIFY(tidp != NULL); 1469 1470 txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot); 1471 1472 txp->tx_slot = slot; 1473 txp->tx_txreq.id = tidp->id; 1474 *txrp = txp->tx_txreq; 1475 1476 tidp->txbuf = txp; 1477 1478 xnfp->xnf_stat_opackets++; 1479 xnfp->xnf_stat_obytes += txp->tx_txreq.size; 1480 1481 txp = txp->tx_next; 1482 slots_free--; 1483 slot++; 1484 1485 } 1486 1487 xnfp->xnf_tx_ring.req_prod_pvt = slot; 1488 1489 /* 1490 * Tell the peer that we sent something, if it cares. 1491 */ 1492 /* LINTED: constant in conditional context */ 1493 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring, 1494 notify); 1495 if (notify) 1496 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1497 1498 mutex_exit(&xnfp->xnf_txlock); 1499 1500 return (txp); 1501 } 1502 1503 /* 1504 * Send the chain of packets `mp'. Called by the MAC framework. 1505 */ 1506 static mblk_t * 1507 xnf_send(void *arg, mblk_t *mp) 1508 { 1509 xnf_t *xnfp = arg; 1510 domid_t oeid; 1511 xnf_txbuf_t *head, *tail; 1512 mblk_t *ml; 1513 int prepared; 1514 1515 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 1516 1517 /* 1518 * Prepare packets for transmission. 1519 */ 1520 head = tail = NULL; 1521 prepared = 0; 1522 while (mp != NULL) { 1523 xnf_txbuf_t *txp; 1524 int n_chunks, length; 1525 boolean_t page_oops; 1526 uint32_t pflags; 1527 1528 for (ml = mp, n_chunks = length = 0, page_oops = B_FALSE; 1529 ml != NULL; 1530 ml = ml->b_cont, n_chunks++) { 1531 1532 /* 1533 * Test if this buffer includes a page 1534 * boundary. The test assumes that the range 1535 * b_rptr...b_wptr can include only a single 1536 * boundary. 1537 */ 1538 if (xnf_btop((size_t)ml->b_rptr) != 1539 xnf_btop((size_t)ml->b_wptr)) { 1540 xnfp->xnf_stat_tx_pagebndry++; 1541 page_oops = B_TRUE; 1542 } 1543 1544 length += MBLKL(ml); 1545 } 1546 DTRACE_PROBE1(xnf_send_b_cont, int, n_chunks); 1547 1548 /* 1549 * Make sure packet isn't too large. 1550 */ 1551 if (length > XNF_FRAMESIZE) { 1552 cmn_err(CE_WARN, 1553 "xnf%d: oversized packet (%d bytes) dropped", 1554 ddi_get_instance(xnfp->xnf_devinfo), length); 1555 freemsg(mp); 1556 continue; 1557 } 1558 1559 txp = kmem_cache_alloc(xnfp->xnf_tx_buf_cache, KM_SLEEP); 1560 if (txp == NULL) 1561 break; 1562 1563 txp->tx_type = TX_DATA; 1564 1565 if ((n_chunks > xnf_max_tx_frags) || page_oops) { 1566 /* 1567 * Loan a side buffer rather than the mblk 1568 * itself. 1569 */ 1570 txp->tx_bdesc = xnf_tx_pullup(xnfp, mp); 1571 if (txp->tx_bdesc == NULL) { 1572 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1573 break; 1574 } 1575 1576 txp->tx_bufp = txp->tx_bdesc->buf; 1577 txp->tx_mfn = txp->tx_bdesc->buf_mfn; 1578 txp->tx_txreq.gref = txp->tx_bdesc->grant_ref; 1579 1580 } else { 1581 int rc; 1582 ddi_dma_cookie_t dma_cookie; 1583 uint_t ncookies; 1584 1585 rc = ddi_dma_addr_bind_handle(txp->tx_dma_handle, 1586 NULL, (char *)mp->b_rptr, length, 1587 DDI_DMA_WRITE | DDI_DMA_STREAMING, 1588 DDI_DMA_DONTWAIT, 0, &dma_cookie, 1589 &ncookies); 1590 if (rc != DDI_DMA_MAPPED) { 1591 ASSERT(rc != DDI_DMA_INUSE); 1592 ASSERT(rc != DDI_DMA_PARTIAL_MAP); 1593 1594 #ifdef XNF_DEBUG 1595 if (rc != DDI_DMA_NORESOURCES) 1596 cmn_err(CE_WARN, 1597 "xnf%d: bind_handle failed (%x)", 1598 ddi_get_instance(xnfp->xnf_devinfo), 1599 rc); 1600 #endif 1601 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1602 break; 1603 } 1604 ASSERT(ncookies == 1); 1605 1606 txp->tx_bdesc = NULL; 1607 txp->tx_bufp = (caddr_t)mp->b_rptr; 1608 txp->tx_mfn = 1609 xnf_btop(pa_to_ma(dma_cookie.dmac_laddress)); 1610 txp->tx_txreq.gref = gref_get(xnfp); 1611 if (txp->tx_txreq.gref == INVALID_GRANT_REF) { 1612 (void) ddi_dma_unbind_handle( 1613 txp->tx_dma_handle); 1614 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1615 break; 1616 } 1617 gnttab_grant_foreign_access_ref(txp->tx_txreq.gref, 1618 oeid, txp->tx_mfn, 1); 1619 } 1620 1621 txp->tx_next = NULL; 1622 txp->tx_mp = mp; 1623 txp->tx_txreq.size = length; 1624 txp->tx_txreq.offset = (uintptr_t)txp->tx_bufp & PAGEOFFSET; 1625 txp->tx_txreq.flags = 0; 1626 hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, 1627 &pflags); 1628 if (pflags != 0) { 1629 /* 1630 * If the local protocol stack requests checksum 1631 * offload we set the 'checksum blank' flag, 1632 * indicating to the peer that we need the checksum 1633 * calculated for us. 1634 * 1635 * We _don't_ set the validated flag, because we haven't 1636 * validated that the data and the checksum match. 1637 */ 1638 xnf_pseudo_cksum(txp->tx_bufp, length); 1639 txp->tx_txreq.flags |= NETTXF_csum_blank; 1640 1641 xnfp->xnf_stat_tx_cksum_deferred++; 1642 } 1643 1644 if (head == NULL) { 1645 ASSERT(tail == NULL); 1646 1647 head = txp; 1648 } else { 1649 ASSERT(tail != NULL); 1650 1651 tail->tx_next = txp; 1652 } 1653 tail = txp; 1654 1655 mp = mp->b_next; 1656 prepared++; 1657 1658 /* 1659 * There is no point in preparing more than 1660 * NET_TX_RING_SIZE, as we won't be able to push them 1661 * into the ring in one go and would hence have to 1662 * un-prepare the extra. 1663 */ 1664 if (prepared == NET_TX_RING_SIZE) 1665 break; 1666 } 1667 1668 DTRACE_PROBE1(xnf_send_prepared, int, prepared); 1669 1670 if (mp != NULL) { 1671 #ifdef XNF_DEBUG 1672 int notprepared = 0; 1673 mblk_t *l = mp; 1674 1675 while (l != NULL) { 1676 notprepared++; 1677 l = l->b_next; 1678 } 1679 1680 DTRACE_PROBE1(xnf_send_notprepared, int, notprepared); 1681 #else /* !XNF_DEBUG */ 1682 DTRACE_PROBE1(xnf_send_notprepared, int, -1); 1683 #endif /* XNF_DEBUG */ 1684 } 1685 1686 /* 1687 * Push the packets we have prepared into the ring. They may 1688 * not all go. 1689 */ 1690 if (head != NULL) 1691 head = tx_push_packets(xnfp, head); 1692 1693 /* 1694 * If some packets that we prepared were not sent, unprepare 1695 * them and add them back to the head of those we didn't 1696 * prepare. 1697 */ 1698 { 1699 xnf_txbuf_t *loop; 1700 mblk_t *mp_head, *mp_tail; 1701 int unprepared = 0; 1702 1703 mp_head = mp_tail = NULL; 1704 loop = head; 1705 1706 while (loop != NULL) { 1707 xnf_txbuf_t *next = loop->tx_next; 1708 1709 if (loop->tx_bdesc == NULL) { 1710 (void) gnttab_end_foreign_access_ref( 1711 loop->tx_txreq.gref, 1); 1712 gref_put(xnfp, loop->tx_txreq.gref); 1713 (void) ddi_dma_unbind_handle( 1714 loop->tx_dma_handle); 1715 } else { 1716 xnf_buf_put(xnfp, loop->tx_bdesc, B_TRUE); 1717 } 1718 1719 ASSERT(loop->tx_mp != NULL); 1720 if (mp_head == NULL) 1721 mp_head = loop->tx_mp; 1722 mp_tail = loop->tx_mp; 1723 1724 kmem_cache_free(xnfp->xnf_tx_buf_cache, loop); 1725 loop = next; 1726 unprepared++; 1727 } 1728 1729 if (mp_tail == NULL) { 1730 ASSERT(mp_head == NULL); 1731 } else { 1732 ASSERT(mp_head != NULL); 1733 1734 mp_tail->b_next = mp; 1735 mp = mp_head; 1736 } 1737 1738 DTRACE_PROBE1(xnf_send_unprepared, int, unprepared); 1739 } 1740 1741 /* 1742 * If any mblks are left then we have deferred for some reason 1743 * and need to ask for a re-schedule later. This is typically 1744 * due to the ring filling. 1745 */ 1746 if (mp != NULL) { 1747 mutex_enter(&xnfp->xnf_schedlock); 1748 xnfp->xnf_need_sched = B_TRUE; 1749 mutex_exit(&xnfp->xnf_schedlock); 1750 1751 xnfp->xnf_stat_tx_defer++; 1752 } 1753 1754 return (mp); 1755 } 1756 1757 /* 1758 * Notification of RX packets. Currently no TX-complete interrupt is 1759 * used, as we clean the TX ring lazily. 1760 */ 1761 static uint_t 1762 xnf_intr(caddr_t arg) 1763 { 1764 xnf_t *xnfp = (xnf_t *)arg; 1765 mblk_t *mp; 1766 boolean_t need_sched, clean_ring; 1767 1768 mutex_enter(&xnfp->xnf_rxlock); 1769 1770 /* 1771 * Interrupts before we are connected are spurious. 1772 */ 1773 if (!xnfp->xnf_connected) { 1774 mutex_exit(&xnfp->xnf_rxlock); 1775 xnfp->xnf_stat_unclaimed_interrupts++; 1776 return (DDI_INTR_UNCLAIMED); 1777 } 1778 1779 /* 1780 * Receive side processing. 1781 */ 1782 do { 1783 /* 1784 * Collect buffers from the ring. 1785 */ 1786 xnf_rx_collect(xnfp); 1787 1788 /* 1789 * Interrupt me when the next receive buffer is consumed. 1790 */ 1791 xnfp->xnf_rx_ring.sring->rsp_event = 1792 xnfp->xnf_rx_ring.rsp_cons + 1; 1793 xen_mb(); 1794 1795 } while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)); 1796 1797 if (xnfp->xnf_rx_new_buffers_posted) { 1798 boolean_t notify; 1799 1800 /* 1801 * Indicate to the peer that we have re-filled the 1802 * receive ring, if it cares. 1803 */ 1804 /* LINTED: constant in conditional context */ 1805 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify); 1806 if (notify) 1807 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1808 xnfp->xnf_rx_new_buffers_posted = B_FALSE; 1809 } 1810 1811 mp = xnfp->xnf_rx_head; 1812 xnfp->xnf_rx_head = xnfp->xnf_rx_tail = NULL; 1813 1814 xnfp->xnf_stat_interrupts++; 1815 mutex_exit(&xnfp->xnf_rxlock); 1816 1817 if (mp != NULL) 1818 mac_rx(xnfp->xnf_mh, NULL, mp); 1819 1820 /* 1821 * Transmit side processing. 1822 * 1823 * If a previous transmit attempt failed or we have pending 1824 * multicast requests, clean the ring. 1825 * 1826 * If we previously stalled transmission and cleaning produces 1827 * some free slots, tell upstream to attempt sending again. 1828 * 1829 * The odd style is to avoid acquiring xnf_txlock unless we 1830 * will actually look inside the tx machinery. 1831 */ 1832 mutex_enter(&xnfp->xnf_schedlock); 1833 need_sched = xnfp->xnf_need_sched; 1834 clean_ring = need_sched || (xnfp->xnf_pending_multicast > 0); 1835 mutex_exit(&xnfp->xnf_schedlock); 1836 1837 if (clean_ring) { 1838 int free_slots; 1839 1840 mutex_enter(&xnfp->xnf_txlock); 1841 free_slots = tx_slots_get(xnfp, 0, B_FALSE); 1842 1843 if (need_sched && (free_slots > 0)) { 1844 mutex_enter(&xnfp->xnf_schedlock); 1845 xnfp->xnf_need_sched = B_FALSE; 1846 mutex_exit(&xnfp->xnf_schedlock); 1847 1848 mac_tx_update(xnfp->xnf_mh); 1849 } 1850 mutex_exit(&xnfp->xnf_txlock); 1851 } 1852 1853 return (DDI_INTR_CLAIMED); 1854 } 1855 1856 /* 1857 * xnf_start() -- start the board receiving and enable interrupts. 1858 */ 1859 static int 1860 xnf_start(void *arg) 1861 { 1862 xnf_t *xnfp = arg; 1863 1864 #ifdef XNF_DEBUG 1865 if (xnf_debug & XNF_DEBUG_TRACE) 1866 printf("xnf%d start(0x%p)\n", 1867 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1868 #endif 1869 1870 mutex_enter(&xnfp->xnf_rxlock); 1871 mutex_enter(&xnfp->xnf_txlock); 1872 1873 /* Accept packets from above. */ 1874 xnfp->xnf_running = B_TRUE; 1875 1876 mutex_exit(&xnfp->xnf_txlock); 1877 mutex_exit(&xnfp->xnf_rxlock); 1878 1879 return (0); 1880 } 1881 1882 /* xnf_stop() - disable hardware */ 1883 static void 1884 xnf_stop(void *arg) 1885 { 1886 xnf_t *xnfp = arg; 1887 1888 #ifdef XNF_DEBUG 1889 if (xnf_debug & XNF_DEBUG_TRACE) 1890 printf("xnf%d stop(0x%p)\n", 1891 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1892 #endif 1893 1894 mutex_enter(&xnfp->xnf_rxlock); 1895 mutex_enter(&xnfp->xnf_txlock); 1896 1897 xnfp->xnf_running = B_FALSE; 1898 1899 mutex_exit(&xnfp->xnf_txlock); 1900 mutex_exit(&xnfp->xnf_rxlock); 1901 } 1902 1903 /* 1904 * Hang buffer `bdesc' on the RX ring. 1905 */ 1906 static void 1907 xnf_rxbuf_hang(xnf_t *xnfp, xnf_buf_t *bdesc) 1908 { 1909 netif_rx_request_t *reqp; 1910 RING_IDX hang_ix; 1911 1912 ASSERT(MUTEX_HELD(&xnfp->xnf_rxlock)); 1913 1914 reqp = RING_GET_REQUEST(&xnfp->xnf_rx_ring, 1915 xnfp->xnf_rx_ring.req_prod_pvt); 1916 hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0)); 1917 ASSERT(xnfp->xnf_rx_pkt_info[hang_ix] == NULL); 1918 1919 reqp->id = bdesc->id = hang_ix; 1920 reqp->gref = bdesc->grant_ref; 1921 1922 xnfp->xnf_rx_pkt_info[hang_ix] = bdesc; 1923 xnfp->xnf_rx_ring.req_prod_pvt++; 1924 1925 xnfp->xnf_rx_new_buffers_posted = B_TRUE; 1926 } 1927 1928 /* 1929 * Collect packets from the RX ring, storing them in `xnfp' for later 1930 * use. 1931 */ 1932 static void 1933 xnf_rx_collect(xnf_t *xnfp) 1934 { 1935 mblk_t *head, *tail; 1936 1937 ASSERT(MUTEX_HELD(&xnfp->xnf_rxlock)); 1938 1939 /* 1940 * Loop over unconsumed responses: 1941 * 1. get a response 1942 * 2. take corresponding buffer off recv. ring 1943 * 3. indicate this by setting slot to NULL 1944 * 4. create a new message and 1945 * 5. copy data in, adjust ptr 1946 */ 1947 1948 head = tail = NULL; 1949 1950 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1951 netif_rx_response_t *rxpkt; 1952 xnf_buf_t *bdesc; 1953 ssize_t len; 1954 size_t off; 1955 mblk_t *mp = NULL; 1956 boolean_t hwcsum = B_FALSE; 1957 grant_ref_t ref; 1958 1959 /* 1. */ 1960 rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring, 1961 xnfp->xnf_rx_ring.rsp_cons); 1962 1963 DTRACE_PROBE4(xnf_rx_got_rsp, int, (int)rxpkt->id, 1964 int, (int)rxpkt->offset, 1965 int, (int)rxpkt->flags, 1966 int, (int)rxpkt->status); 1967 1968 /* 1969 * 2. 1970 */ 1971 bdesc = xnfp->xnf_rx_pkt_info[rxpkt->id]; 1972 1973 /* 1974 * 3. 1975 */ 1976 xnfp->xnf_rx_pkt_info[rxpkt->id] = NULL; 1977 ASSERT(bdesc->id == rxpkt->id); 1978 1979 ref = bdesc->grant_ref; 1980 off = rxpkt->offset; 1981 len = rxpkt->status; 1982 1983 if (!xnfp->xnf_running) { 1984 DTRACE_PROBE4(xnf_rx_not_running, 1985 int, rxpkt->status, 1986 char *, bdesc->buf, int, rxpkt->offset, 1987 char *, ((char *)bdesc->buf) + rxpkt->offset); 1988 1989 xnfp->xnf_stat_drop++; 1990 1991 } else if (len <= 0) { 1992 DTRACE_PROBE4(xnf_rx_pkt_status_negative, 1993 int, rxpkt->status, 1994 char *, bdesc->buf, int, rxpkt->offset, 1995 char *, ((char *)bdesc->buf) + rxpkt->offset); 1996 1997 xnfp->xnf_stat_errrx++; 1998 1999 switch (len) { 2000 case 0: 2001 xnfp->xnf_stat_runt++; 2002 break; 2003 case NETIF_RSP_ERROR: 2004 xnfp->xnf_stat_mac_rcv_error++; 2005 break; 2006 case NETIF_RSP_DROPPED: 2007 xnfp->xnf_stat_norxbuf++; 2008 break; 2009 } 2010 2011 } else if (bdesc->grant_ref == INVALID_GRANT_REF) { 2012 cmn_err(CE_WARN, "Bad rx grant reference %d " 2013 "from domain %d", ref, 2014 xvdi_get_oeid(xnfp->xnf_devinfo)); 2015 2016 } else if ((off + len) > PAGESIZE) { 2017 cmn_err(CE_WARN, "Rx packet overflows page " 2018 "(offset %ld, length %ld) from domain %d", 2019 off, len, xvdi_get_oeid(xnfp->xnf_devinfo)); 2020 } else { 2021 xnf_buf_t *nbuf = NULL; 2022 2023 DTRACE_PROBE4(xnf_rx_packet, int, len, 2024 char *, bdesc->buf, int, off, 2025 char *, ((char *)bdesc->buf) + off); 2026 2027 ASSERT(off + len <= PAGEOFFSET); 2028 2029 if (rxpkt->flags & NETRXF_data_validated) 2030 hwcsum = B_TRUE; 2031 2032 /* 2033 * If the packet is below a pre-determined 2034 * size we will copy data out rather than 2035 * replace it. 2036 */ 2037 if (len > xnf_rx_copy_limit) 2038 nbuf = xnf_buf_get(xnfp, KM_NOSLEEP, B_FALSE); 2039 2040 /* 2041 * If we have a replacement buffer, attempt to 2042 * wrap the existing one with an mblk_t in 2043 * order that the upper layers of the stack 2044 * might use it directly. 2045 */ 2046 if (nbuf != NULL) { 2047 mp = desballoc((unsigned char *)bdesc->buf, 2048 bdesc->len, 0, &bdesc->free_rtn); 2049 if (mp == NULL) { 2050 xnfp->xnf_stat_rx_desballoc_fail++; 2051 xnfp->xnf_stat_norxbuf++; 2052 2053 xnf_buf_put(xnfp, nbuf, B_FALSE); 2054 nbuf = NULL; 2055 } else { 2056 mp->b_rptr = mp->b_rptr + off; 2057 mp->b_wptr = mp->b_rptr + len; 2058 2059 /* 2060 * Release the grant reference 2061 * associated with this buffer 2062 * - they are scarce and the 2063 * upper layers of the stack 2064 * don't need it. 2065 */ 2066 (void) gnttab_end_foreign_access_ref( 2067 bdesc->grant_ref, 0); 2068 gref_put(xnfp, bdesc->grant_ref); 2069 bdesc->grant_ref = INVALID_GRANT_REF; 2070 2071 bdesc = nbuf; 2072 } 2073 } 2074 2075 if (nbuf == NULL) { 2076 /* 2077 * No replacement buffer allocated - 2078 * attempt to copy the data out and 2079 * re-hang the existing buffer. 2080 */ 2081 2082 /* 4. */ 2083 mp = allocb(len, BPRI_MED); 2084 if (mp == NULL) { 2085 xnfp->xnf_stat_rx_allocb_fail++; 2086 xnfp->xnf_stat_norxbuf++; 2087 } else { 2088 /* 5. */ 2089 bcopy(bdesc->buf + off, mp->b_wptr, 2090 len); 2091 mp->b_wptr += len; 2092 } 2093 } 2094 } 2095 2096 /* Re-hang the buffer. */ 2097 xnf_rxbuf_hang(xnfp, bdesc); 2098 2099 if (mp != NULL) { 2100 if (hwcsum) { 2101 /* 2102 * If the peer says that the data has 2103 * been validated then we declare that 2104 * the full checksum has been 2105 * verified. 2106 * 2107 * We don't look at the "checksum 2108 * blank" flag, and hence could have a 2109 * packet here that we are asserting 2110 * is good with a blank checksum. 2111 * 2112 * The hardware checksum offload 2113 * specification says that we must 2114 * provide the actual checksum as well 2115 * as an assertion that it is valid, 2116 * but the protocol stack doesn't 2117 * actually use it and some other 2118 * drivers don't bother, so we don't. 2119 * If it was necessary we could grovel 2120 * in the packet to find it. 2121 */ 2122 (void) hcksum_assoc(mp, NULL, 2123 NULL, 0, 0, 0, 0, 2124 HCK_FULLCKSUM | 2125 HCK_FULLCKSUM_OK, 0); 2126 xnfp->xnf_stat_rx_cksum_no_need++; 2127 } 2128 if (head == NULL) { 2129 ASSERT(tail == NULL); 2130 2131 head = mp; 2132 } else { 2133 ASSERT(tail != NULL); 2134 2135 tail->b_next = mp; 2136 } 2137 tail = mp; 2138 2139 ASSERT(mp->b_next == NULL); 2140 2141 xnfp->xnf_stat_ipackets++; 2142 xnfp->xnf_stat_rbytes += len; 2143 } 2144 2145 xnfp->xnf_rx_ring.rsp_cons++; 2146 } 2147 2148 /* 2149 * Store the mblks we have collected. 2150 */ 2151 if (head != NULL) { 2152 ASSERT(tail != NULL); 2153 2154 if (xnfp->xnf_rx_head == NULL) { 2155 ASSERT(xnfp->xnf_rx_tail == NULL); 2156 2157 xnfp->xnf_rx_head = head; 2158 } else { 2159 ASSERT(xnfp->xnf_rx_tail != NULL); 2160 2161 xnfp->xnf_rx_tail->b_next = head; 2162 } 2163 xnfp->xnf_rx_tail = tail; 2164 } 2165 } 2166 2167 /* 2168 * xnf_alloc_dma_resources() -- initialize the drivers structures 2169 */ 2170 static int 2171 xnf_alloc_dma_resources(xnf_t *xnfp) 2172 { 2173 dev_info_t *devinfo = xnfp->xnf_devinfo; 2174 size_t len; 2175 ddi_dma_cookie_t dma_cookie; 2176 uint_t ncookies; 2177 int rc; 2178 caddr_t rptr; 2179 2180 /* 2181 * The code below allocates all the DMA data structures that 2182 * need to be released when the driver is detached. 2183 * 2184 * Allocate page for the transmit descriptor ring. 2185 */ 2186 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 2187 DDI_DMA_SLEEP, 0, &xnfp->xnf_tx_ring_dma_handle) != DDI_SUCCESS) 2188 goto alloc_error; 2189 2190 if (ddi_dma_mem_alloc(xnfp->xnf_tx_ring_dma_handle, 2191 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 2192 DDI_DMA_SLEEP, 0, &rptr, &len, 2193 &xnfp->xnf_tx_ring_dma_acchandle) != DDI_SUCCESS) { 2194 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2195 xnfp->xnf_tx_ring_dma_handle = NULL; 2196 goto alloc_error; 2197 } 2198 2199 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_tx_ring_dma_handle, NULL, 2200 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 2201 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 2202 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 2203 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2204 xnfp->xnf_tx_ring_dma_handle = NULL; 2205 xnfp->xnf_tx_ring_dma_acchandle = NULL; 2206 if (rc == DDI_DMA_NORESOURCES) 2207 goto alloc_error; 2208 else 2209 goto error; 2210 } 2211 2212 ASSERT(ncookies == 1); 2213 bzero(rptr, PAGESIZE); 2214 /* LINTED: constant in conditional context */ 2215 SHARED_RING_INIT((netif_tx_sring_t *)rptr); 2216 /* LINTED: constant in conditional context */ 2217 FRONT_RING_INIT(&xnfp->xnf_tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE); 2218 xnfp->xnf_tx_ring_phys_addr = dma_cookie.dmac_laddress; 2219 2220 /* 2221 * Allocate page for the receive descriptor ring. 2222 */ 2223 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 2224 DDI_DMA_SLEEP, 0, &xnfp->xnf_rx_ring_dma_handle) != DDI_SUCCESS) 2225 goto alloc_error; 2226 2227 if (ddi_dma_mem_alloc(xnfp->xnf_rx_ring_dma_handle, 2228 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 2229 DDI_DMA_SLEEP, 0, &rptr, &len, 2230 &xnfp->xnf_rx_ring_dma_acchandle) != DDI_SUCCESS) { 2231 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2232 xnfp->xnf_rx_ring_dma_handle = NULL; 2233 goto alloc_error; 2234 } 2235 2236 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_rx_ring_dma_handle, NULL, 2237 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 2238 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 2239 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 2240 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2241 xnfp->xnf_rx_ring_dma_handle = NULL; 2242 xnfp->xnf_rx_ring_dma_acchandle = NULL; 2243 if (rc == DDI_DMA_NORESOURCES) 2244 goto alloc_error; 2245 else 2246 goto error; 2247 } 2248 2249 ASSERT(ncookies == 1); 2250 bzero(rptr, PAGESIZE); 2251 /* LINTED: constant in conditional context */ 2252 SHARED_RING_INIT((netif_rx_sring_t *)rptr); 2253 /* LINTED: constant in conditional context */ 2254 FRONT_RING_INIT(&xnfp->xnf_rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE); 2255 xnfp->xnf_rx_ring_phys_addr = dma_cookie.dmac_laddress; 2256 2257 return (DDI_SUCCESS); 2258 2259 alloc_error: 2260 cmn_err(CE_WARN, "xnf%d: could not allocate enough DMA memory", 2261 ddi_get_instance(xnfp->xnf_devinfo)); 2262 error: 2263 xnf_release_dma_resources(xnfp); 2264 return (DDI_FAILURE); 2265 } 2266 2267 /* 2268 * Release all DMA resources in the opposite order from acquisition 2269 */ 2270 static void 2271 xnf_release_dma_resources(xnf_t *xnfp) 2272 { 2273 int i; 2274 2275 /* 2276 * Free receive buffers which are currently associated with 2277 * descriptors. 2278 */ 2279 mutex_enter(&xnfp->xnf_rxlock); 2280 for (i = 0; i < NET_RX_RING_SIZE; i++) { 2281 xnf_buf_t *bp; 2282 2283 if ((bp = xnfp->xnf_rx_pkt_info[i]) == NULL) 2284 continue; 2285 xnfp->xnf_rx_pkt_info[i] = NULL; 2286 xnf_buf_put(xnfp, bp, B_FALSE); 2287 } 2288 mutex_exit(&xnfp->xnf_rxlock); 2289 2290 /* Free the receive ring buffer. */ 2291 if (xnfp->xnf_rx_ring_dma_acchandle != NULL) { 2292 (void) ddi_dma_unbind_handle(xnfp->xnf_rx_ring_dma_handle); 2293 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 2294 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2295 xnfp->xnf_rx_ring_dma_acchandle = NULL; 2296 } 2297 /* Free the transmit ring buffer. */ 2298 if (xnfp->xnf_tx_ring_dma_acchandle != NULL) { 2299 (void) ddi_dma_unbind_handle(xnfp->xnf_tx_ring_dma_handle); 2300 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 2301 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2302 xnfp->xnf_tx_ring_dma_acchandle = NULL; 2303 } 2304 2305 } 2306 2307 /* 2308 * Release any packets and associated structures used by the TX ring. 2309 */ 2310 static void 2311 xnf_release_mblks(xnf_t *xnfp) 2312 { 2313 RING_IDX i; 2314 xnf_txid_t *tidp; 2315 2316 for (i = 0, tidp = &xnfp->xnf_tx_pkt_id[0]; 2317 i < NET_TX_RING_SIZE; 2318 i++, tidp++) { 2319 xnf_txbuf_t *txp = tidp->txbuf; 2320 2321 if (txp != NULL) { 2322 ASSERT(txp->tx_mp != NULL); 2323 freemsg(txp->tx_mp); 2324 2325 txid_put(xnfp, tidp); 2326 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 2327 } 2328 } 2329 } 2330 2331 static int 2332 xnf_buf_constructor(void *buf, void *arg, int kmflag) 2333 { 2334 int (*ddiflags)(caddr_t) = DDI_DMA_SLEEP; 2335 xnf_buf_t *bdesc = buf; 2336 xnf_t *xnfp = arg; 2337 ddi_dma_cookie_t dma_cookie; 2338 uint_t ncookies; 2339 size_t len; 2340 2341 if (kmflag & KM_NOSLEEP) 2342 ddiflags = DDI_DMA_DONTWAIT; 2343 2344 /* Allocate a DMA access handle for the buffer. */ 2345 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &buf_dma_attr, 2346 ddiflags, 0, &bdesc->dma_handle) != DDI_SUCCESS) 2347 goto failure; 2348 2349 /* Allocate DMA-able memory for buffer. */ 2350 if (ddi_dma_mem_alloc(bdesc->dma_handle, 2351 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, ddiflags, 0, 2352 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 2353 goto failure_1; 2354 2355 /* Bind to virtual address of buffer to get physical address. */ 2356 if (ddi_dma_addr_bind_handle(bdesc->dma_handle, NULL, 2357 bdesc->buf, len, DDI_DMA_RDWR | DDI_DMA_STREAMING, 2358 ddiflags, 0, &dma_cookie, &ncookies) != DDI_DMA_MAPPED) 2359 goto failure_2; 2360 ASSERT(ncookies == 1); 2361 2362 bdesc->free_rtn.free_func = xnf_buf_recycle; 2363 bdesc->free_rtn.free_arg = (caddr_t)bdesc; 2364 bdesc->xnfp = xnfp; 2365 bdesc->buf_phys = dma_cookie.dmac_laddress; 2366 bdesc->buf_mfn = pfn_to_mfn(xnf_btop(bdesc->buf_phys)); 2367 bdesc->len = dma_cookie.dmac_size; 2368 bdesc->grant_ref = INVALID_GRANT_REF; 2369 bdesc->gen = xnfp->xnf_gen; 2370 2371 atomic_add_64(&xnfp->xnf_stat_buf_allocated, 1); 2372 2373 return (0); 2374 2375 failure_2: 2376 ddi_dma_mem_free(&bdesc->acc_handle); 2377 2378 failure_1: 2379 ddi_dma_free_handle(&bdesc->dma_handle); 2380 2381 failure: 2382 2383 return (-1); 2384 } 2385 2386 static void 2387 xnf_buf_destructor(void *buf, void *arg) 2388 { 2389 xnf_buf_t *bdesc = buf; 2390 xnf_t *xnfp = arg; 2391 2392 (void) ddi_dma_unbind_handle(bdesc->dma_handle); 2393 ddi_dma_mem_free(&bdesc->acc_handle); 2394 ddi_dma_free_handle(&bdesc->dma_handle); 2395 2396 atomic_add_64(&xnfp->xnf_stat_buf_allocated, -1); 2397 } 2398 2399 static xnf_buf_t * 2400 xnf_buf_get(xnf_t *xnfp, int flags, boolean_t readonly) 2401 { 2402 grant_ref_t gref; 2403 xnf_buf_t *bufp; 2404 2405 /* 2406 * Usually grant references are more scarce than memory, so we 2407 * attempt to acquire a grant reference first. 2408 */ 2409 gref = gref_get(xnfp); 2410 if (gref == INVALID_GRANT_REF) 2411 return (NULL); 2412 2413 bufp = kmem_cache_alloc(xnfp->xnf_buf_cache, flags); 2414 if (bufp == NULL) { 2415 gref_put(xnfp, gref); 2416 return (NULL); 2417 } 2418 2419 ASSERT(bufp->grant_ref == INVALID_GRANT_REF); 2420 2421 bufp->grant_ref = gref; 2422 2423 if (bufp->gen != xnfp->xnf_gen) 2424 xnf_buf_refresh(bufp); 2425 2426 gnttab_grant_foreign_access_ref(bufp->grant_ref, 2427 xvdi_get_oeid(bufp->xnfp->xnf_devinfo), 2428 bufp->buf_mfn, readonly ? 1 : 0); 2429 2430 atomic_add_64(&xnfp->xnf_stat_buf_outstanding, 1); 2431 2432 return (bufp); 2433 } 2434 2435 static void 2436 xnf_buf_put(xnf_t *xnfp, xnf_buf_t *bufp, boolean_t readonly) 2437 { 2438 if (bufp->grant_ref != INVALID_GRANT_REF) { 2439 (void) gnttab_end_foreign_access_ref( 2440 bufp->grant_ref, readonly ? 1 : 0); 2441 gref_put(xnfp, bufp->grant_ref); 2442 bufp->grant_ref = INVALID_GRANT_REF; 2443 } 2444 2445 kmem_cache_free(xnfp->xnf_buf_cache, bufp); 2446 2447 atomic_add_64(&xnfp->xnf_stat_buf_outstanding, -1); 2448 } 2449 2450 /* 2451 * Refresh any cached data about a buffer after resume. 2452 */ 2453 static void 2454 xnf_buf_refresh(xnf_buf_t *bdesc) 2455 { 2456 bdesc->buf_mfn = pfn_to_mfn(xnf_btop(bdesc->buf_phys)); 2457 bdesc->gen = bdesc->xnfp->xnf_gen; 2458 } 2459 2460 /* 2461 * Streams `freeb' routine for `xnf_buf_t' when used as transmit 2462 * look-aside buffers. 2463 */ 2464 static void 2465 xnf_buf_recycle(xnf_buf_t *bdesc) 2466 { 2467 xnf_t *xnfp = bdesc->xnfp; 2468 2469 xnf_buf_put(xnfp, bdesc, B_TRUE); 2470 } 2471 2472 static int 2473 xnf_tx_buf_constructor(void *buf, void *arg, int kmflag) 2474 { 2475 _NOTE(ARGUNUSED(kmflag)); 2476 xnf_txbuf_t *txp = buf; 2477 xnf_t *xnfp = arg; 2478 2479 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &buf_dma_attr, 2480 0, 0, &txp->tx_dma_handle) != DDI_SUCCESS) 2481 return (-1); 2482 2483 return (0); 2484 } 2485 2486 static void 2487 xnf_tx_buf_destructor(void *buf, void *arg) 2488 { 2489 _NOTE(ARGUNUSED(arg)); 2490 xnf_txbuf_t *txp = buf; 2491 2492 ddi_dma_free_handle(&txp->tx_dma_handle); 2493 } 2494 2495 /* 2496 * Statistics. 2497 */ 2498 static char *xnf_aux_statistics[] = { 2499 "tx_cksum_deferred", 2500 "rx_cksum_no_need", 2501 "interrupts", 2502 "unclaimed_interrupts", 2503 "tx_pullup", 2504 "tx_pagebndry", 2505 "tx_attempt", 2506 "buf_allocated", 2507 "buf_outstanding", 2508 "gref_outstanding", 2509 "gref_failure", 2510 "gref_peak", 2511 "rx_allocb_fail", 2512 "rx_desballoc_fail", 2513 }; 2514 2515 static int 2516 xnf_kstat_aux_update(kstat_t *ksp, int flag) 2517 { 2518 xnf_t *xnfp; 2519 kstat_named_t *knp; 2520 2521 if (flag != KSTAT_READ) 2522 return (EACCES); 2523 2524 xnfp = ksp->ks_private; 2525 knp = ksp->ks_data; 2526 2527 /* 2528 * Assignment order must match that of the names in 2529 * xnf_aux_statistics. 2530 */ 2531 (knp++)->value.ui64 = xnfp->xnf_stat_tx_cksum_deferred; 2532 (knp++)->value.ui64 = xnfp->xnf_stat_rx_cksum_no_need; 2533 2534 (knp++)->value.ui64 = xnfp->xnf_stat_interrupts; 2535 (knp++)->value.ui64 = xnfp->xnf_stat_unclaimed_interrupts; 2536 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pullup; 2537 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pagebndry; 2538 (knp++)->value.ui64 = xnfp->xnf_stat_tx_attempt; 2539 2540 (knp++)->value.ui64 = xnfp->xnf_stat_buf_allocated; 2541 (knp++)->value.ui64 = xnfp->xnf_stat_buf_outstanding; 2542 (knp++)->value.ui64 = xnfp->xnf_stat_gref_outstanding; 2543 (knp++)->value.ui64 = xnfp->xnf_stat_gref_failure; 2544 (knp++)->value.ui64 = xnfp->xnf_stat_gref_peak; 2545 (knp++)->value.ui64 = xnfp->xnf_stat_rx_allocb_fail; 2546 (knp++)->value.ui64 = xnfp->xnf_stat_rx_desballoc_fail; 2547 2548 return (0); 2549 } 2550 2551 static boolean_t 2552 xnf_kstat_init(xnf_t *xnfp) 2553 { 2554 int nstat = sizeof (xnf_aux_statistics) / 2555 sizeof (xnf_aux_statistics[0]); 2556 char **cp = xnf_aux_statistics; 2557 kstat_named_t *knp; 2558 2559 /* 2560 * Create and initialise kstats. 2561 */ 2562 if ((xnfp->xnf_kstat_aux = kstat_create("xnf", 2563 ddi_get_instance(xnfp->xnf_devinfo), 2564 "aux_statistics", "net", KSTAT_TYPE_NAMED, 2565 nstat, 0)) == NULL) 2566 return (B_FALSE); 2567 2568 xnfp->xnf_kstat_aux->ks_private = xnfp; 2569 xnfp->xnf_kstat_aux->ks_update = xnf_kstat_aux_update; 2570 2571 knp = xnfp->xnf_kstat_aux->ks_data; 2572 while (nstat > 0) { 2573 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 2574 2575 knp++; 2576 cp++; 2577 nstat--; 2578 } 2579 2580 kstat_install(xnfp->xnf_kstat_aux); 2581 2582 return (B_TRUE); 2583 } 2584 2585 static int 2586 xnf_stat(void *arg, uint_t stat, uint64_t *val) 2587 { 2588 xnf_t *xnfp = arg; 2589 2590 mutex_enter(&xnfp->xnf_rxlock); 2591 mutex_enter(&xnfp->xnf_txlock); 2592 2593 #define mac_stat(q, r) \ 2594 case (MAC_STAT_##q): \ 2595 *val = xnfp->xnf_stat_##r; \ 2596 break 2597 2598 #define ether_stat(q, r) \ 2599 case (ETHER_STAT_##q): \ 2600 *val = xnfp->xnf_stat_##r; \ 2601 break 2602 2603 switch (stat) { 2604 2605 mac_stat(IPACKETS, ipackets); 2606 mac_stat(OPACKETS, opackets); 2607 mac_stat(RBYTES, rbytes); 2608 mac_stat(OBYTES, obytes); 2609 mac_stat(NORCVBUF, norxbuf); 2610 mac_stat(IERRORS, errrx); 2611 mac_stat(NOXMTBUF, tx_defer); 2612 2613 ether_stat(MACRCV_ERRORS, mac_rcv_error); 2614 ether_stat(TOOSHORT_ERRORS, runt); 2615 2616 /* always claim to be in full duplex mode */ 2617 case ETHER_STAT_LINK_DUPLEX: 2618 *val = LINK_DUPLEX_FULL; 2619 break; 2620 2621 /* always claim to be at 1Gb/s link speed */ 2622 case MAC_STAT_IFSPEED: 2623 *val = 1000000000ull; 2624 break; 2625 2626 default: 2627 mutex_exit(&xnfp->xnf_txlock); 2628 mutex_exit(&xnfp->xnf_rxlock); 2629 2630 return (ENOTSUP); 2631 } 2632 2633 #undef mac_stat 2634 #undef ether_stat 2635 2636 mutex_exit(&xnfp->xnf_txlock); 2637 mutex_exit(&xnfp->xnf_rxlock); 2638 2639 return (0); 2640 } 2641 2642 static boolean_t 2643 xnf_getcapab(void *arg, mac_capab_t cap, void *cap_data) 2644 { 2645 _NOTE(ARGUNUSED(arg)); 2646 2647 switch (cap) { 2648 case MAC_CAPAB_HCKSUM: { 2649 uint32_t *capab = cap_data; 2650 2651 /* 2652 * Whilst the flag used to communicate with the IO 2653 * domain is called "NETTXF_csum_blank", the checksum 2654 * in the packet must contain the pseudo-header 2655 * checksum and not zero. 2656 * 2657 * To help out the IO domain, we might use 2658 * HCKSUM_INET_PARTIAL. Unfortunately our stack will 2659 * then use checksum offload for IPv6 packets, which 2660 * the IO domain can't handle. 2661 * 2662 * As a result, we declare outselves capable of 2663 * HCKSUM_INET_FULL_V4. This means that we receive 2664 * IPv4 packets from the stack with a blank checksum 2665 * field and must insert the pseudo-header checksum 2666 * before passing the packet to the IO domain. 2667 */ 2668 *capab = HCKSUM_INET_FULL_V4; 2669 break; 2670 } 2671 default: 2672 return (B_FALSE); 2673 } 2674 2675 return (B_TRUE); 2676 } 2677 2678 /* 2679 * The state of the peer has changed - react accordingly. 2680 */ 2681 static void 2682 oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 2683 void *arg, void *impl_data) 2684 { 2685 _NOTE(ARGUNUSED(id, arg)); 2686 xnf_t *xnfp = ddi_get_driver_private(dip); 2687 XenbusState new_state = *(XenbusState *)impl_data; 2688 2689 ASSERT(xnfp != NULL); 2690 2691 switch (new_state) { 2692 case XenbusStateUnknown: 2693 case XenbusStateInitialising: 2694 case XenbusStateInitialised: 2695 case XenbusStateClosing: 2696 case XenbusStateClosed: 2697 case XenbusStateReconfiguring: 2698 case XenbusStateReconfigured: 2699 break; 2700 2701 case XenbusStateInitWait: 2702 xnf_read_config(xnfp); 2703 2704 if (!xnfp->xnf_be_rx_copy) { 2705 cmn_err(CE_WARN, 2706 "The xnf driver requires a dom0 that " 2707 "supports 'feature-rx-copy'."); 2708 (void) xvdi_switch_state(xnfp->xnf_devinfo, 2709 XBT_NULL, XenbusStateClosed); 2710 break; 2711 } 2712 2713 /* 2714 * Connect to the backend. 2715 */ 2716 xnf_be_connect(xnfp); 2717 2718 /* 2719 * Our MAC address as discovered by xnf_read_config(). 2720 */ 2721 mac_unicst_update(xnfp->xnf_mh, xnfp->xnf_mac_addr); 2722 2723 break; 2724 2725 case XenbusStateConnected: 2726 mutex_enter(&xnfp->xnf_rxlock); 2727 mutex_enter(&xnfp->xnf_txlock); 2728 2729 xnfp->xnf_connected = B_TRUE; 2730 /* 2731 * Wake up any threads waiting to send data to 2732 * backend. 2733 */ 2734 cv_broadcast(&xnfp->xnf_cv_state); 2735 2736 mutex_exit(&xnfp->xnf_txlock); 2737 mutex_exit(&xnfp->xnf_rxlock); 2738 2739 /* 2740 * Kick the peer in case it missed any transmits 2741 * request in the TX ring. 2742 */ 2743 ec_notify_via_evtchn(xnfp->xnf_evtchn); 2744 2745 /* 2746 * There may already be completed receive requests in 2747 * the ring sent by backend after it gets connected 2748 * but before we see its state change here, so we call 2749 * xnf_intr() to handle them, if any. 2750 */ 2751 (void) xnf_intr((caddr_t)xnfp); 2752 2753 /* 2754 * Mark the link up now that we are connected. 2755 */ 2756 mac_link_update(xnfp->xnf_mh, LINK_STATE_UP); 2757 2758 /* 2759 * Tell the backend about the multicast addresses in 2760 * which we are interested. 2761 */ 2762 mac_multicast_refresh(xnfp->xnf_mh, NULL, xnfp, B_TRUE); 2763 2764 break; 2765 2766 default: 2767 break; 2768 } 2769 } 2770