1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * 29 * Copyright (c) 2004 Christian Limpach. 30 * All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. This section intentionally left blank. 41 * 4. The name of the author may not be used to endorse or promote products 42 * derived from this software without specific prior written permission. 43 * 44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 49 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 53 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 */ 55 /* 56 * Section 3 of the above license was updated in response to bug 6379571. 57 */ 58 59 /* 60 * xnf.c - Nemo-based network driver for domU 61 */ 62 63 #include <sys/types.h> 64 #include <sys/errno.h> 65 #include <sys/param.h> 66 #include <sys/sysmacros.h> 67 #include <sys/systm.h> 68 #include <sys/stream.h> 69 #include <sys/strsubr.h> 70 #include <sys/conf.h> 71 #include <sys/ddi.h> 72 #include <sys/devops.h> 73 #include <sys/sunddi.h> 74 #include <sys/sunndi.h> 75 #include <sys/dlpi.h> 76 #include <sys/ethernet.h> 77 #include <sys/strsun.h> 78 #include <sys/pattr.h> 79 #include <inet/ip.h> 80 #include <inet/ip_impl.h> 81 #include <sys/gld.h> 82 #include <sys/modctl.h> 83 #include <sys/mac_provider.h> 84 #include <sys/mac_ether.h> 85 #include <sys/bootinfo.h> 86 #include <sys/mach_mmu.h> 87 #ifdef XPV_HVM_DRIVER 88 #include <sys/xpv_support.h> 89 #include <sys/hypervisor.h> 90 #else 91 #include <sys/hypervisor.h> 92 #include <sys/evtchn_impl.h> 93 #include <sys/balloon_impl.h> 94 #endif 95 #include <xen/public/io/netif.h> 96 #include <sys/gnttab.h> 97 #include <xen/sys/xendev.h> 98 #include <sys/sdt.h> 99 100 #include <io/xnf.h> 101 102 103 /* 104 * Declarations and Module Linkage 105 */ 106 107 #if defined(DEBUG) || defined(__lint) 108 #define XNF_DEBUG 109 int xnfdebug = 0; 110 #endif 111 112 /* 113 * On a 32 bit PAE system physical and machine addresses are larger 114 * than 32 bits. ddi_btop() on such systems take an unsigned long 115 * argument, and so addresses above 4G are truncated before ddi_btop() 116 * gets to see them. To avoid this, code the shift operation here. 117 */ 118 #define xnf_btop(addr) ((addr) >> PAGESHIFT) 119 120 boolean_t xnf_cksum_offload = B_TRUE; 121 122 /* Default value for hypervisor-based copy operations */ 123 boolean_t xnf_rx_hvcopy = B_TRUE; 124 125 /* 126 * Should pages used for transmit be readonly for the peer? 127 */ 128 boolean_t xnf_tx_pages_readonly = B_FALSE; 129 /* 130 * Packets under this size are bcopied instead of using desballoc. 131 * Choose a value > XNF_FRAMESIZE (1514) to force the receive path to 132 * always copy. 133 */ 134 unsigned int xnf_rx_bcopy_thresh = 64; 135 136 unsigned int xnf_max_tx_frags = 1; 137 138 /* Required system entry points */ 139 static int xnf_attach(dev_info_t *, ddi_attach_cmd_t); 140 static int xnf_detach(dev_info_t *, ddi_detach_cmd_t); 141 142 /* Required driver entry points for Nemo */ 143 static int xnf_start(void *); 144 static void xnf_stop(void *); 145 static int xnf_set_mac_addr(void *, const uint8_t *); 146 static int xnf_set_multicast(void *, boolean_t, const uint8_t *); 147 static int xnf_set_promiscuous(void *, boolean_t); 148 static mblk_t *xnf_send(void *, mblk_t *); 149 static uint_t xnf_intr(caddr_t); 150 static int xnf_stat(void *, uint_t, uint64_t *); 151 static void xnf_ioctl(void *, queue_t *, mblk_t *); 152 static boolean_t xnf_getcapab(void *, mac_capab_t, void *); 153 154 /* Driver private functions */ 155 static int xnf_alloc_dma_resources(xnf_t *); 156 static void xnf_release_dma_resources(xnf_t *); 157 static mblk_t *xnf_process_recv(xnf_t *); 158 static void xnf_rcv_complete(struct xnf_buffer_desc *); 159 static void xnf_release_mblks(xnf_t *); 160 static struct xnf_buffer_desc *xnf_alloc_tx_buffer(xnf_t *); 161 static struct xnf_buffer_desc *xnf_alloc_buffer(xnf_t *); 162 static struct xnf_buffer_desc *xnf_get_tx_buffer(xnf_t *); 163 static struct xnf_buffer_desc *xnf_get_buffer(xnf_t *); 164 static void xnf_free_buffer(struct xnf_buffer_desc *); 165 static void xnf_free_tx_buffer(struct xnf_buffer_desc *); 166 void xnf_send_driver_status(int, int); 167 static void rx_buffer_hang(xnf_t *, struct xnf_buffer_desc *); 168 static int xnf_clean_tx_ring(xnf_t *); 169 static void oe_state_change(dev_info_t *, ddi_eventcookie_t, 170 void *, void *); 171 static mblk_t *xnf_process_hvcopy_recv(xnf_t *xnfp); 172 static boolean_t xnf_hvcopy_peer_status(dev_info_t *devinfo); 173 static boolean_t xnf_kstat_init(xnf_t *xnfp); 174 175 /* 176 * XXPV dme: remove MC_IOCTL? 177 */ 178 static mac_callbacks_t xnf_callbacks = { 179 MC_IOCTL | MC_GETCAPAB, 180 xnf_stat, 181 xnf_start, 182 xnf_stop, 183 xnf_set_promiscuous, 184 xnf_set_multicast, 185 xnf_set_mac_addr, 186 xnf_send, 187 xnf_ioctl, 188 xnf_getcapab 189 }; 190 191 #define GRANT_INVALID_REF 0 192 const int xnf_rx_bufs_lowat = 4 * NET_RX_RING_SIZE; 193 const int xnf_rx_bufs_hiwat = 8 * NET_RX_RING_SIZE; /* default max */ 194 195 /* DMA attributes for network ring buffer */ 196 static ddi_dma_attr_t ringbuf_dma_attr = { 197 DMA_ATTR_V0, /* version of this structure */ 198 0, /* lowest usable address */ 199 0xffffffffffffffffULL, /* highest usable address */ 200 0x7fffffff, /* maximum DMAable byte count */ 201 MMU_PAGESIZE, /* alignment in bytes */ 202 0x7ff, /* bitmap of burst sizes */ 203 1, /* minimum transfer */ 204 0xffffffffU, /* maximum transfer */ 205 0xffffffffffffffffULL, /* maximum segment length */ 206 1, /* maximum number of segments */ 207 1, /* granularity */ 208 0, /* flags (reserved) */ 209 }; 210 211 /* DMA attributes for transmit data */ 212 static ddi_dma_attr_t tx_buffer_dma_attr = { 213 DMA_ATTR_V0, /* version of this structure */ 214 0, /* lowest usable address */ 215 0xffffffffffffffffULL, /* highest usable address */ 216 0x7fffffff, /* maximum DMAable byte count */ 217 MMU_PAGESIZE, /* alignment in bytes */ 218 0x7ff, /* bitmap of burst sizes */ 219 1, /* minimum transfer */ 220 0xffffffffU, /* maximum transfer */ 221 0xffffffffffffffffULL, /* maximum segment length */ 222 1, /* maximum number of segments */ 223 1, /* granularity */ 224 0, /* flags (reserved) */ 225 }; 226 227 /* DMA attributes for a receive buffer */ 228 static ddi_dma_attr_t rx_buffer_dma_attr = { 229 DMA_ATTR_V0, /* version of this structure */ 230 0, /* lowest usable address */ 231 0xffffffffffffffffULL, /* highest usable address */ 232 0x7fffffff, /* maximum DMAable byte count */ 233 MMU_PAGESIZE, /* alignment in bytes */ 234 0x7ff, /* bitmap of burst sizes */ 235 1, /* minimum transfer */ 236 0xffffffffU, /* maximum transfer */ 237 0xffffffffffffffffULL, /* maximum segment length */ 238 1, /* maximum number of segments */ 239 1, /* granularity */ 240 0, /* flags (reserved) */ 241 }; 242 243 /* DMA access attributes for registers and descriptors */ 244 static ddi_device_acc_attr_t accattr = { 245 DDI_DEVICE_ATTR_V0, 246 DDI_STRUCTURE_LE_ACC, /* This is a little-endian device */ 247 DDI_STRICTORDER_ACC 248 }; 249 250 /* DMA access attributes for data: NOT to be byte swapped. */ 251 static ddi_device_acc_attr_t data_accattr = { 252 DDI_DEVICE_ATTR_V0, 253 DDI_NEVERSWAP_ACC, 254 DDI_STRICTORDER_ACC 255 }; 256 257 unsigned char xnf_broadcastaddr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 258 int xnf_diagnose = 0; /* Patchable global for diagnostic purposes */ 259 260 DDI_DEFINE_STREAM_OPS(xnf_dev_ops, nulldev, nulldev, xnf_attach, xnf_detach, 261 nodev, NULL, D_MP, NULL, ddi_quiesce_not_supported); 262 263 static struct modldrv xnf_modldrv = { 264 &mod_driverops, 265 "Virtual Ethernet driver", 266 &xnf_dev_ops 267 }; 268 269 static struct modlinkage modlinkage = { 270 MODREV_1, &xnf_modldrv, NULL 271 }; 272 273 int 274 _init(void) 275 { 276 int r; 277 278 mac_init_ops(&xnf_dev_ops, "xnf"); 279 r = mod_install(&modlinkage); 280 if (r != DDI_SUCCESS) 281 mac_fini_ops(&xnf_dev_ops); 282 283 return (r); 284 } 285 286 int 287 _fini(void) 288 { 289 return (EBUSY); /* XXPV dme: should be removable */ 290 } 291 292 int 293 _info(struct modinfo *modinfop) 294 { 295 return (mod_info(&modlinkage, modinfop)); 296 } 297 298 static int 299 xnf_setup_rings(xnf_t *xnfp) 300 { 301 int ix, err; 302 RING_IDX i; 303 struct xnf_buffer_desc *bdesc, *rbp; 304 struct xenbus_device *xsd; 305 domid_t oeid; 306 307 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 308 xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 309 310 if (xnfp->xnf_tx_ring_ref != GRANT_INVALID_REF) 311 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 312 313 err = gnttab_grant_foreign_access(oeid, 314 xnf_btop(pa_to_ma(xnfp->xnf_tx_ring_phys_addr)), 0); 315 if (err <= 0) { 316 err = -err; 317 xenbus_dev_error(xsd, err, "granting access to tx ring page"); 318 goto out; 319 } 320 xnfp->xnf_tx_ring_ref = (grant_ref_t)err; 321 322 if (xnfp->xnf_rx_ring_ref != GRANT_INVALID_REF) 323 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 324 325 err = gnttab_grant_foreign_access(oeid, 326 xnf_btop(pa_to_ma(xnfp->xnf_rx_ring_phys_addr)), 0); 327 if (err <= 0) { 328 err = -err; 329 xenbus_dev_error(xsd, err, "granting access to rx ring page"); 330 goto out; 331 } 332 xnfp->xnf_rx_ring_ref = (grant_ref_t)err; 333 334 335 mutex_enter(&xnfp->xnf_intrlock); 336 337 /* 338 * Cleanup the TX ring. We just clean up any valid tx_pktinfo structs 339 * and reset the ring. Note that this can lose packets after a resume, 340 * but we expect to stagger on. 341 */ 342 mutex_enter(&xnfp->xnf_txlock); 343 344 for (i = 0; i < xnfp->xnf_n_tx; i++) { 345 struct tx_pktinfo *txp = &xnfp->xnf_tx_pkt_info[i]; 346 347 txp->id = i + 1; 348 349 if (txp->grant_ref == GRANT_INVALID_REF) { 350 ASSERT(txp->mp == NULL); 351 ASSERT(txp->bdesc == NULL); 352 continue; 353 } 354 355 if (gnttab_query_foreign_access(txp->grant_ref) != 0) 356 panic("tx grant still in use by backend domain"); 357 358 freemsg(txp->mp); 359 txp->mp = NULL; 360 361 (void) ddi_dma_unbind_handle(txp->dma_handle); 362 363 if (txp->bdesc != NULL) { 364 xnf_free_tx_buffer(txp->bdesc); 365 txp->bdesc = NULL; 366 } 367 368 (void) gnttab_end_foreign_access_ref(txp->grant_ref, 369 xnfp->xnf_tx_pages_readonly); 370 gnttab_release_grant_reference(&xnfp->xnf_gref_tx_head, 371 txp->grant_ref); 372 txp->grant_ref = GRANT_INVALID_REF; 373 } 374 375 xnfp->xnf_tx_pkt_id_list = 0; 376 xnfp->xnf_tx_ring.rsp_cons = 0; 377 xnfp->xnf_tx_ring.req_prod_pvt = 0; 378 379 /* LINTED: constant in conditional context */ 380 SHARED_RING_INIT(xnfp->xnf_tx_ring.sring); 381 382 mutex_exit(&xnfp->xnf_txlock); 383 384 /* 385 * Rebuild the RX ring. We have to rebuild the RX ring because some of 386 * our pages are currently flipped out/granted so we can't just free 387 * the RX buffers. Reclaim any unprocessed recv buffers, they won't be 388 * useable anyway since the mfn's they refer to are no longer valid. 389 * Grant the backend domain access to each hung rx buffer. 390 */ 391 i = xnfp->xnf_rx_ring.rsp_cons; 392 while (i++ != xnfp->xnf_rx_ring.sring->req_prod) { 393 volatile netif_rx_request_t *rxrp; 394 395 rxrp = RING_GET_REQUEST(&xnfp->xnf_rx_ring, i); 396 ix = rxrp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0); 397 rbp = xnfp->xnf_rxpkt_bufptr[ix]; 398 if (rbp != NULL) { 399 grant_ref_t ref = rbp->grant_ref; 400 401 ASSERT(ref != GRANT_INVALID_REF); 402 if (xnfp->xnf_rx_hvcopy) { 403 pfn_t pfn = xnf_btop(rbp->buf_phys); 404 mfn_t mfn = pfn_to_mfn(pfn); 405 406 gnttab_grant_foreign_access_ref(ref, oeid, 407 mfn, 0); 408 } else { 409 gnttab_grant_foreign_transfer_ref(ref, 410 oeid, 0); 411 } 412 rxrp->id = ix; 413 rxrp->gref = ref; 414 } 415 } 416 417 /* 418 * Reset the ring pointers to initial state. 419 * Hang buffers for any empty ring slots. 420 */ 421 xnfp->xnf_rx_ring.rsp_cons = 0; 422 xnfp->xnf_rx_ring.req_prod_pvt = 0; 423 424 /* LINTED: constant in conditional context */ 425 SHARED_RING_INIT(xnfp->xnf_rx_ring.sring); 426 427 for (i = 0; i < NET_RX_RING_SIZE; i++) { 428 xnfp->xnf_rx_ring.req_prod_pvt = i; 429 if (xnfp->xnf_rxpkt_bufptr[i] != NULL) 430 continue; 431 if ((bdesc = xnf_get_buffer(xnfp)) == NULL) 432 break; 433 rx_buffer_hang(xnfp, bdesc); 434 } 435 xnfp->xnf_rx_ring.req_prod_pvt = i; 436 /* LINTED: constant in conditional context */ 437 RING_PUSH_REQUESTS(&xnfp->xnf_rx_ring); 438 439 mutex_exit(&xnfp->xnf_intrlock); 440 441 return (0); 442 443 out: 444 if (xnfp->xnf_tx_ring_ref != GRANT_INVALID_REF) 445 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 446 xnfp->xnf_tx_ring_ref = GRANT_INVALID_REF; 447 448 if (xnfp->xnf_rx_ring_ref != GRANT_INVALID_REF) 449 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 450 xnfp->xnf_rx_ring_ref = GRANT_INVALID_REF; 451 452 return (err); 453 } 454 455 456 /* Called when the upper layers free a message we passed upstream */ 457 static void 458 xnf_copy_rcv_complete(struct xnf_buffer_desc *bdesc) 459 { 460 (void) ddi_dma_unbind_handle(bdesc->dma_handle); 461 ddi_dma_mem_free(&bdesc->acc_handle); 462 ddi_dma_free_handle(&bdesc->dma_handle); 463 kmem_free(bdesc, sizeof (*bdesc)); 464 } 465 466 467 /* 468 * Connect driver to back end, called to set up communication with 469 * back end driver both initially and on resume after restore/migrate. 470 */ 471 void 472 xnf_be_connect(xnf_t *xnfp) 473 { 474 const char *message; 475 xenbus_transaction_t xbt; 476 struct xenbus_device *xsd; 477 char *xsname; 478 int err; 479 480 ASSERT(!xnfp->xnf_connected); 481 482 xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 483 xsname = xvdi_get_xsname(xnfp->xnf_devinfo); 484 485 err = xnf_setup_rings(xnfp); 486 if (err != 0) { 487 cmn_err(CE_WARN, "failed to set up tx/rx rings"); 488 xenbus_dev_error(xsd, err, "setting up ring"); 489 return; 490 } 491 492 again: 493 err = xenbus_transaction_start(&xbt); 494 if (err != 0) { 495 xenbus_dev_error(xsd, EIO, "starting transaction"); 496 return; 497 } 498 499 err = xenbus_printf(xbt, xsname, "tx-ring-ref", "%u", 500 xnfp->xnf_tx_ring_ref); 501 if (err != 0) { 502 message = "writing tx ring-ref"; 503 goto abort_transaction; 504 } 505 506 err = xenbus_printf(xbt, xsname, "rx-ring-ref", "%u", 507 xnfp->xnf_rx_ring_ref); 508 if (err != 0) { 509 message = "writing rx ring-ref"; 510 goto abort_transaction; 511 } 512 513 err = xenbus_printf(xbt, xsname, "event-channel", "%u", 514 xnfp->xnf_evtchn); 515 if (err != 0) { 516 message = "writing event-channel"; 517 goto abort_transaction; 518 } 519 520 err = xenbus_printf(xbt, xsname, "feature-rx-notify", "%d", 1); 521 if (err != 0) { 522 message = "writing feature-rx-notify"; 523 goto abort_transaction; 524 } 525 526 if (!xnfp->xnf_tx_pages_readonly) { 527 err = xenbus_printf(xbt, xsname, "feature-tx-writable", 528 "%d", 1); 529 if (err != 0) { 530 message = "writing feature-tx-writable"; 531 goto abort_transaction; 532 } 533 } 534 535 err = xenbus_printf(xbt, xsname, "feature-no-csum-offload", "%d", 536 xnfp->xnf_cksum_offload ? 0 : 1); 537 if (err != 0) { 538 message = "writing feature-no-csum-offload"; 539 goto abort_transaction; 540 } 541 err = xenbus_printf(xbt, xsname, "request-rx-copy", "%d", 542 xnfp->xnf_rx_hvcopy ? 1 : 0); 543 if (err != 0) { 544 message = "writing request-rx-copy"; 545 goto abort_transaction; 546 } 547 548 err = xenbus_printf(xbt, xsname, "state", "%d", XenbusStateConnected); 549 if (err != 0) { 550 message = "writing frontend XenbusStateConnected"; 551 goto abort_transaction; 552 } 553 554 err = xenbus_transaction_end(xbt, 0); 555 if (err != 0) { 556 if (err == EAGAIN) 557 goto again; 558 xenbus_dev_error(xsd, err, "completing transaction"); 559 } 560 561 return; 562 563 abort_transaction: 564 (void) xenbus_transaction_end(xbt, 1); 565 xenbus_dev_error(xsd, err, "%s", message); 566 } 567 568 /* 569 * Read config info from xenstore 570 */ 571 void 572 xnf_read_config(xnf_t *xnfp) 573 { 574 char mac[ETHERADDRL * 3]; 575 int err, be_no_cksum_offload; 576 577 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->xnf_devinfo), "mac", 578 "%s", (char *)&mac[0]); 579 if (err != 0) { 580 /* 581 * bad: we're supposed to be set up with a proper mac 582 * addr. at this point 583 */ 584 cmn_err(CE_WARN, "%s%d: no mac address", 585 ddi_driver_name(xnfp->xnf_devinfo), 586 ddi_get_instance(xnfp->xnf_devinfo)); 587 return; 588 } 589 if (ether_aton(mac, xnfp->xnf_mac_addr) != ETHERADDRL) { 590 err = ENOENT; 591 xenbus_dev_error(xvdi_get_xsd(xnfp->xnf_devinfo), ENOENT, 592 "parsing %s/mac", xvdi_get_xsname(xnfp->xnf_devinfo)); 593 return; 594 } 595 596 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->xnf_devinfo), 597 "feature-no-csum-offload", "%d", &be_no_cksum_offload); 598 /* 599 * If we fail to read the store we assume that the key is 600 * absent, implying an older domain at the far end. Older 601 * domains always support checksum offload. 602 */ 603 if (err != 0) 604 be_no_cksum_offload = 0; 605 /* 606 * If the far end cannot do checksum offload or we do not wish 607 * to do it, disable it. 608 */ 609 if ((be_no_cksum_offload == 1) || !xnfp->xnf_cksum_offload) 610 xnfp->xnf_cksum_offload = B_FALSE; 611 } 612 613 /* 614 * attach(9E) -- Attach a device to the system 615 * 616 * Called once for each board successfully probed. 617 */ 618 static int 619 xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) 620 { 621 mac_register_t *macp; 622 xnf_t *xnfp; 623 int err; 624 625 #ifdef XNF_DEBUG 626 if (xnfdebug & XNF_DEBUG_DDI) 627 printf("xnf%d: attach(0x%p)\n", ddi_get_instance(devinfo), 628 (void *)devinfo); 629 #endif 630 631 switch (cmd) { 632 case DDI_RESUME: 633 xnfp = ddi_get_driver_private(devinfo); 634 635 (void) xvdi_resume(devinfo); 636 (void) xvdi_alloc_evtchn(devinfo); 637 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 638 #ifdef XPV_HVM_DRIVER 639 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, 640 xnfp); 641 #else 642 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, 643 (caddr_t)xnfp); 644 #endif 645 xnf_be_connect(xnfp); 646 /* 647 * Our MAC address may have changed if we're resuming: 648 * - on a different host 649 * - on the same one and got a different MAC address 650 * because we didn't specify one of our own. 651 * so it's useful to claim that it changed in order that 652 * IP send out a gratuitous ARP. 653 */ 654 mac_unicst_update(xnfp->xnf_mh, xnfp->xnf_mac_addr); 655 return (DDI_SUCCESS); 656 657 case DDI_ATTACH: 658 break; 659 660 default: 661 return (DDI_FAILURE); 662 } 663 664 /* 665 * Allocate gld_mac_info_t and xnf_instance structures 666 */ 667 macp = mac_alloc(MAC_VERSION); 668 if (macp == NULL) 669 return (DDI_FAILURE); 670 xnfp = kmem_zalloc(sizeof (*xnfp), KM_SLEEP); 671 672 macp->m_dip = devinfo; 673 macp->m_driver = xnfp; 674 xnfp->xnf_devinfo = devinfo; 675 676 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 677 macp->m_src_addr = xnfp->xnf_mac_addr; 678 macp->m_callbacks = &xnf_callbacks; 679 macp->m_min_sdu = 0; 680 macp->m_max_sdu = XNF_MAXPKT; 681 682 xnfp->xnf_running = B_FALSE; 683 xnfp->xnf_connected = B_FALSE; 684 xnfp->xnf_cksum_offload = xnf_cksum_offload; 685 xnfp->xnf_tx_pages_readonly = xnf_tx_pages_readonly; 686 xnfp->xnf_need_sched = B_FALSE; 687 688 xnfp->xnf_rx_hvcopy = xnf_hvcopy_peer_status(devinfo) && xnf_rx_hvcopy; 689 #ifdef XPV_HVM_DRIVER 690 /* 691 * Report our version to dom0. 692 */ 693 if (xenbus_printf(XBT_NULL, "guest/xnf", "version", "%d", 694 HVMPV_XNF_VERS)) 695 cmn_err(CE_WARN, "xnf: couldn't write version\n"); 696 697 if (!xnfp->xnf_rx_hvcopy) { 698 cmn_err(CE_WARN, "The xnf driver requires a dom0 that " 699 "supports 'feature-rx-copy'"); 700 goto failure; 701 } 702 #endif 703 704 /* 705 * Get the iblock cookie with which to initialize the mutexes. 706 */ 707 if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->xnf_icookie) 708 != DDI_SUCCESS) 709 goto failure; 710 /* 711 * Driver locking strategy: the txlock protects all paths 712 * through the driver, except the interrupt thread. 713 * If the interrupt thread needs to do something which could 714 * affect the operation of any other part of the driver, 715 * it needs to acquire the txlock mutex. 716 */ 717 mutex_init(&xnfp->xnf_tx_buf_mutex, 718 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 719 mutex_init(&xnfp->xnf_rx_buf_mutex, 720 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 721 mutex_init(&xnfp->xnf_txlock, 722 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 723 mutex_init(&xnfp->xnf_intrlock, 724 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 725 cv_init(&xnfp->xnf_cv, NULL, CV_DEFAULT, NULL); 726 727 xnfp->xnf_gref_tx_head = (grant_ref_t)-1; 728 xnfp->xnf_gref_rx_head = (grant_ref_t)-1; 729 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, 730 &xnfp->xnf_gref_tx_head) < 0) { 731 cmn_err(CE_WARN, "xnf%d: can't alloc tx grant refs", 732 ddi_get_instance(xnfp->xnf_devinfo)); 733 goto failure_1; 734 } 735 if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, 736 &xnfp->xnf_gref_rx_head) < 0) { 737 cmn_err(CE_WARN, "xnf%d: can't alloc rx grant refs", 738 ddi_get_instance(xnfp->xnf_devinfo)); 739 goto failure_1; 740 } 741 if (xnf_alloc_dma_resources(xnfp) == DDI_FAILURE) { 742 cmn_err(CE_WARN, "xnf%d: failed to allocate and initialize " 743 "driver data structures", 744 ddi_get_instance(xnfp->xnf_devinfo)); 745 goto failure_1; 746 } 747 748 xnfp->xnf_rx_ring.sring->rsp_event = 749 xnfp->xnf_tx_ring.sring->rsp_event = 1; 750 751 xnfp->xnf_tx_ring_ref = GRANT_INVALID_REF; 752 xnfp->xnf_rx_ring_ref = GRANT_INVALID_REF; 753 754 /* set driver private pointer now */ 755 ddi_set_driver_private(devinfo, xnfp); 756 757 if (xvdi_add_event_handler(devinfo, XS_OE_STATE, oe_state_change, NULL) 758 != DDI_SUCCESS) 759 goto failure_1; 760 761 if (!xnf_kstat_init(xnfp)) 762 goto failure_2; 763 764 /* 765 * Allocate an event channel, add the interrupt handler and 766 * bind it to the event channel. 767 */ 768 (void) xvdi_alloc_evtchn(devinfo); 769 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 770 #ifdef XPV_HVM_DRIVER 771 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, xnfp); 772 #else 773 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, (caddr_t)xnfp); 774 #endif 775 776 xnf_read_config(xnfp); 777 err = mac_register(macp, &xnfp->xnf_mh); 778 mac_free(macp); 779 macp = NULL; 780 if (err != 0) 781 goto failure_3; 782 783 #ifdef XPV_HVM_DRIVER 784 /* 785 * In the HVM case, this driver essentially replaces a driver for 786 * a 'real' PCI NIC. Without the "model" property set to 787 * "Ethernet controller", like the PCI code does, netbooting does 788 * not work correctly, as strplumb_get_netdev_path() will not find 789 * this interface. 790 */ 791 (void) ndi_prop_update_string(DDI_DEV_T_NONE, devinfo, "model", 792 "Ethernet controller"); 793 #endif 794 795 /* 796 * connect to the backend 797 */ 798 xnf_be_connect(xnfp); 799 800 return (DDI_SUCCESS); 801 802 failure_3: 803 kstat_delete(xnfp->xnf_kstat_aux); 804 #ifdef XPV_HVM_DRIVER 805 ec_unbind_evtchn(xnfp->xnf_evtchn); 806 xvdi_free_evtchn(devinfo); 807 #else 808 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 809 #endif 810 xnfp->xnf_evtchn = INVALID_EVTCHN; 811 812 failure_2: 813 xvdi_remove_event_handler(devinfo, XS_OE_STATE); 814 815 failure_1: 816 if (xnfp->xnf_gref_tx_head != (grant_ref_t)-1) 817 gnttab_free_grant_references(xnfp->xnf_gref_tx_head); 818 if (xnfp->xnf_gref_rx_head != (grant_ref_t)-1) 819 gnttab_free_grant_references(xnfp->xnf_gref_rx_head); 820 xnf_release_dma_resources(xnfp); 821 cv_destroy(&xnfp->xnf_cv); 822 mutex_destroy(&xnfp->xnf_rx_buf_mutex); 823 mutex_destroy(&xnfp->xnf_txlock); 824 mutex_destroy(&xnfp->xnf_intrlock); 825 826 failure: 827 kmem_free(xnfp, sizeof (*xnfp)); 828 if (macp != NULL) 829 mac_free(macp); 830 831 return (DDI_FAILURE); 832 } 833 834 /* detach(9E) -- Detach a device from the system */ 835 static int 836 xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) 837 { 838 xnf_t *xnfp; /* Our private device info */ 839 int i; 840 841 #ifdef XNF_DEBUG 842 if (xnfdebug & XNF_DEBUG_DDI) 843 printf("xnf_detach(0x%p)\n", (void *)devinfo); 844 #endif 845 846 xnfp = ddi_get_driver_private(devinfo); 847 848 switch (cmd) { 849 case DDI_SUSPEND: 850 #ifdef XPV_HVM_DRIVER 851 ec_unbind_evtchn(xnfp->xnf_evtchn); 852 xvdi_free_evtchn(devinfo); 853 #else 854 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 855 #endif 856 857 xvdi_suspend(devinfo); 858 859 mutex_enter(&xnfp->xnf_intrlock); 860 mutex_enter(&xnfp->xnf_txlock); 861 862 xnfp->xnf_evtchn = INVALID_EVTCHN; 863 xnfp->xnf_connected = B_FALSE; 864 mutex_exit(&xnfp->xnf_txlock); 865 mutex_exit(&xnfp->xnf_intrlock); 866 867 /* claim link to be down after disconnect */ 868 mac_link_update(xnfp->xnf_mh, LINK_STATE_DOWN); 869 return (DDI_SUCCESS); 870 871 case DDI_DETACH: 872 break; 873 874 default: 875 return (DDI_FAILURE); 876 } 877 878 if (xnfp->xnf_connected) 879 return (DDI_FAILURE); 880 881 /* Wait for receive buffers to be returned; give up after 5 seconds */ 882 i = 50; 883 884 mutex_enter(&xnfp->xnf_rx_buf_mutex); 885 while (xnfp->xnf_rx_bufs_outstanding > 0) { 886 mutex_exit(&xnfp->xnf_rx_buf_mutex); 887 delay(drv_usectohz(100000)); 888 if (--i == 0) { 889 cmn_err(CE_WARN, 890 "xnf%d: never reclaimed all the " 891 "receive buffers. Still have %d " 892 "buffers outstanding.", 893 ddi_get_instance(xnfp->xnf_devinfo), 894 xnfp->xnf_rx_bufs_outstanding); 895 return (DDI_FAILURE); 896 } 897 mutex_enter(&xnfp->xnf_rx_buf_mutex); 898 } 899 mutex_exit(&xnfp->xnf_rx_buf_mutex); 900 901 if (mac_unregister(xnfp->xnf_mh) != 0) 902 return (DDI_FAILURE); 903 904 kstat_delete(xnfp->xnf_kstat_aux); 905 906 /* Stop the receiver */ 907 xnf_stop(xnfp); 908 909 xvdi_remove_event_handler(devinfo, XS_OE_STATE); 910 911 /* Remove the interrupt */ 912 #ifdef XPV_HVM_DRIVER 913 ec_unbind_evtchn(xnfp->xnf_evtchn); 914 xvdi_free_evtchn(devinfo); 915 #else 916 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 917 #endif 918 919 /* Release any pending xmit mblks */ 920 xnf_release_mblks(xnfp); 921 922 /* Release all DMA resources */ 923 xnf_release_dma_resources(xnfp); 924 925 cv_destroy(&xnfp->xnf_cv); 926 mutex_destroy(&xnfp->xnf_rx_buf_mutex); 927 mutex_destroy(&xnfp->xnf_txlock); 928 mutex_destroy(&xnfp->xnf_intrlock); 929 930 kmem_free(xnfp, sizeof (*xnfp)); 931 932 return (DDI_SUCCESS); 933 } 934 935 /* 936 * xnf_set_mac_addr() -- set the physical network address on the board. 937 */ 938 /*ARGSUSED*/ 939 static int 940 xnf_set_mac_addr(void *arg, const uint8_t *macaddr) 941 { 942 xnf_t *xnfp = arg; 943 944 #ifdef XNF_DEBUG 945 if (xnfdebug & XNF_DEBUG_TRACE) 946 printf("xnf%d: set_mac_addr(0x%p): " 947 "%02x:%02x:%02x:%02x:%02x:%02x\n", 948 ddi_get_instance(xnfp->xnf_devinfo), 949 (void *)xnfp, macaddr[0], macaddr[1], macaddr[2], 950 macaddr[3], macaddr[4], macaddr[5]); 951 #endif 952 /* 953 * We can't set our macaddr. 954 * 955 * XXPV dme: Why not? 956 */ 957 return (ENOTSUP); 958 } 959 960 /* 961 * xnf_set_multicast() -- set (enable) or disable a multicast address. 962 * 963 * Program the hardware to enable/disable the multicast address 964 * in "mcast". Enable if "add" is true, disable if false. 965 */ 966 /*ARGSUSED*/ 967 static int 968 xnf_set_multicast(void *arg, boolean_t add, const uint8_t *mca) 969 { 970 xnf_t *xnfp = arg; 971 972 #ifdef XNF_DEBUG 973 if (xnfdebug & XNF_DEBUG_TRACE) 974 printf("xnf%d set_multicast(0x%p): " 975 "%02x:%02x:%02x:%02x:%02x:%02x\n", 976 ddi_get_instance(xnfp->xnf_devinfo), 977 (void *)xnfp, mca[0], mca[1], mca[2], 978 mca[3], mca[4], mca[5]); 979 #endif 980 981 /* 982 * XXPV dme: Ideally we'd relay the address to the backend for 983 * enabling. The protocol doesn't support that (interesting 984 * extension), so we simply succeed and hope that the relevant 985 * packets are going to arrive. 986 * 987 * If protocol support is added for enable/disable then we'll 988 * need to keep a list of those in use and re-add on resume. 989 */ 990 return (0); 991 } 992 993 /* 994 * xnf_set_promiscuous() -- set or reset promiscuous mode on the board 995 * 996 * Program the hardware to enable/disable promiscuous mode. 997 */ 998 /*ARGSUSED*/ 999 static int 1000 xnf_set_promiscuous(void *arg, boolean_t on) 1001 { 1002 xnf_t *xnfp = arg; 1003 1004 #ifdef XNF_DEBUG 1005 if (xnfdebug & XNF_DEBUG_TRACE) 1006 printf("xnf%d set_promiscuous(0x%p, %x)\n", 1007 ddi_get_instance(xnfp->xnf_devinfo), 1008 (void *)xnfp, on); 1009 #endif 1010 /* 1011 * We can't really do this, but we pretend that we can in 1012 * order that snoop will work. 1013 */ 1014 return (0); 1015 } 1016 1017 /* 1018 * Clean buffers that we have responses for from the transmit ring. 1019 */ 1020 static int 1021 xnf_clean_tx_ring(xnf_t *xnfp) 1022 { 1023 RING_IDX next_resp, i; 1024 struct tx_pktinfo *reap; 1025 int id; 1026 grant_ref_t ref; 1027 boolean_t work_to_do; 1028 1029 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 1030 1031 loop: 1032 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_tx_ring)) { 1033 /* 1034 * index of next transmission ack 1035 */ 1036 next_resp = xnfp->xnf_tx_ring.sring->rsp_prod; 1037 membar_consumer(); 1038 /* 1039 * Clean tx packets from ring that we have responses for 1040 */ 1041 for (i = xnfp->xnf_tx_ring.rsp_cons; i != next_resp; i++) { 1042 id = RING_GET_RESPONSE(&xnfp->xnf_tx_ring, i)->id; 1043 reap = &xnfp->xnf_tx_pkt_info[id]; 1044 ref = reap->grant_ref; 1045 /* 1046 * Return id to free list 1047 */ 1048 reap->id = xnfp->xnf_tx_pkt_id_list; 1049 xnfp->xnf_tx_pkt_id_list = id; 1050 if (gnttab_query_foreign_access(ref) != 0) 1051 panic("tx grant still in use " 1052 "by backend domain"); 1053 (void) ddi_dma_unbind_handle(reap->dma_handle); 1054 (void) gnttab_end_foreign_access_ref(ref, 1055 xnfp->xnf_tx_pages_readonly); 1056 gnttab_release_grant_reference(&xnfp->xnf_gref_tx_head, 1057 ref); 1058 freemsg(reap->mp); 1059 reap->mp = NULL; 1060 reap->grant_ref = GRANT_INVALID_REF; 1061 if (reap->bdesc != NULL) 1062 xnf_free_tx_buffer(reap->bdesc); 1063 reap->bdesc = NULL; 1064 } 1065 xnfp->xnf_tx_ring.rsp_cons = next_resp; 1066 membar_enter(); 1067 } 1068 1069 /* LINTED: constant in conditional context */ 1070 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_tx_ring, work_to_do); 1071 if (work_to_do) 1072 goto loop; 1073 1074 return (RING_FREE_REQUESTS(&xnfp->xnf_tx_ring)); 1075 } 1076 1077 /* 1078 * If we need to pull up data from either a packet that crosses a page 1079 * boundary or consisting of multiple mblks, do it here. We allocate 1080 * a page aligned buffer and copy the data into it. The header for the 1081 * allocated buffer is returned. (which is also allocated here) 1082 */ 1083 static struct xnf_buffer_desc * 1084 xnf_pullupmsg(xnf_t *xnfp, mblk_t *mp) 1085 { 1086 struct xnf_buffer_desc *bdesc; 1087 mblk_t *mptr; 1088 caddr_t bp; 1089 int len; 1090 1091 /* 1092 * get a xmit buffer from the xmit buffer pool 1093 */ 1094 mutex_enter(&xnfp->xnf_rx_buf_mutex); 1095 bdesc = xnf_get_tx_buffer(xnfp); 1096 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1097 if (bdesc == NULL) 1098 return (bdesc); 1099 /* 1100 * Copy the data into the buffer 1101 */ 1102 xnfp->xnf_stat_tx_pullup++; 1103 bp = bdesc->buf; 1104 for (mptr = mp; mptr != NULL; mptr = mptr->b_cont) { 1105 len = mptr->b_wptr - mptr->b_rptr; 1106 bcopy(mptr->b_rptr, bp, len); 1107 bp += len; 1108 } 1109 return (bdesc); 1110 } 1111 1112 void 1113 xnf_pseudo_cksum(caddr_t buf, int length) 1114 { 1115 struct ether_header *ehp; 1116 uint16_t sap, len, *stuff; 1117 uint32_t cksum; 1118 size_t offset; 1119 ipha_t *ipha; 1120 ipaddr_t src, dst; 1121 1122 ASSERT(length >= sizeof (*ehp)); 1123 ehp = (struct ether_header *)buf; 1124 1125 if (ntohs(ehp->ether_type) == VLAN_TPID) { 1126 struct ether_vlan_header *evhp; 1127 1128 ASSERT(length >= sizeof (*evhp)); 1129 evhp = (struct ether_vlan_header *)buf; 1130 sap = ntohs(evhp->ether_type); 1131 offset = sizeof (*evhp); 1132 } else { 1133 sap = ntohs(ehp->ether_type); 1134 offset = sizeof (*ehp); 1135 } 1136 1137 ASSERT(sap == ETHERTYPE_IP); 1138 1139 /* Packet should have been pulled up by the caller. */ 1140 if ((offset + sizeof (ipha_t)) > length) { 1141 cmn_err(CE_WARN, "xnf_pseudo_cksum: no room for checksum"); 1142 return; 1143 } 1144 1145 ipha = (ipha_t *)(buf + offset); 1146 1147 ASSERT(IPH_HDR_LENGTH(ipha) == IP_SIMPLE_HDR_LENGTH); 1148 1149 len = ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH; 1150 1151 switch (ipha->ipha_protocol) { 1152 case IPPROTO_TCP: 1153 stuff = IPH_TCPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 1154 cksum = IP_TCP_CSUM_COMP; 1155 break; 1156 case IPPROTO_UDP: 1157 stuff = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 1158 cksum = IP_UDP_CSUM_COMP; 1159 break; 1160 default: 1161 cmn_err(CE_WARN, "xnf_pseudo_cksum: unexpected protocol %d", 1162 ipha->ipha_protocol); 1163 return; 1164 } 1165 1166 src = ipha->ipha_src; 1167 dst = ipha->ipha_dst; 1168 1169 cksum += (dst >> 16) + (dst & 0xFFFF); 1170 cksum += (src >> 16) + (src & 0xFFFF); 1171 cksum += htons(len); 1172 1173 cksum = (cksum >> 16) + (cksum & 0xFFFF); 1174 cksum = (cksum >> 16) + (cksum & 0xFFFF); 1175 1176 ASSERT(cksum <= 0xFFFF); 1177 1178 *stuff = (uint16_t)(cksum ? cksum : ~cksum); 1179 } 1180 1181 /* 1182 * xnf_send_one() -- send a packet 1183 * 1184 * Called when a packet is ready to be transmitted. A pointer to an 1185 * M_DATA message that contains the packet is passed to this routine. 1186 * At least the complete LLC header is contained in the message's 1187 * first message block, and the remainder of the packet is contained 1188 * within additional M_DATA message blocks linked to the first 1189 * message block. 1190 * 1191 */ 1192 static boolean_t 1193 xnf_send_one(xnf_t *xnfp, mblk_t *mp) 1194 { 1195 struct xnf_buffer_desc *xmitbuf; 1196 struct tx_pktinfo *txp_info; 1197 mblk_t *mptr; 1198 ddi_dma_cookie_t dma_cookie; 1199 RING_IDX slot; 1200 int length = 0, i, pktlen = 0, rc, tx_id; 1201 int tx_ring_freespace, page_oops; 1202 uint_t ncookies; 1203 volatile netif_tx_request_t *txrp; 1204 caddr_t bufaddr; 1205 grant_ref_t ref; 1206 unsigned long mfn; 1207 uint32_t pflags; 1208 domid_t oeid; 1209 1210 #ifdef XNF_DEBUG 1211 if (xnfdebug & XNF_DEBUG_SEND) 1212 printf("xnf%d send(0x%p, 0x%p)\n", 1213 ddi_get_instance(xnfp->xnf_devinfo), 1214 (void *)xnfp, (void *)mp); 1215 #endif 1216 1217 ASSERT(mp != NULL); 1218 ASSERT(mp->b_next == NULL); 1219 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 1220 1221 tx_ring_freespace = xnf_clean_tx_ring(xnfp); 1222 ASSERT(tx_ring_freespace >= 0); 1223 1224 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 1225 xnfp->xnf_stat_tx_attempt++; 1226 /* 1227 * If there are no xmit ring slots available, return. 1228 */ 1229 if (tx_ring_freespace == 0) { 1230 xnfp->xnf_stat_tx_defer++; 1231 return (B_FALSE); /* Send should be retried */ 1232 } 1233 1234 slot = xnfp->xnf_tx_ring.req_prod_pvt; 1235 /* Count the number of mblks in message and compute packet size */ 1236 for (i = 0, mptr = mp; mptr != NULL; mptr = mptr->b_cont, i++) 1237 pktlen += (mptr->b_wptr - mptr->b_rptr); 1238 1239 /* Make sure packet isn't too large */ 1240 if (pktlen > XNF_FRAMESIZE) { 1241 cmn_err(CE_WARN, "xnf%d: oversized packet (%d bytes) dropped", 1242 ddi_get_instance(xnfp->xnf_devinfo), pktlen); 1243 freemsg(mp); 1244 return (B_TRUE); 1245 } 1246 1247 /* 1248 * Test if we cross a page boundary with our buffer 1249 */ 1250 page_oops = (i == 1) && 1251 (xnf_btop((size_t)mp->b_rptr) != 1252 xnf_btop((size_t)(mp->b_rptr + pktlen))); 1253 /* 1254 * XXPV - unfortunately, the Xen virtual net device currently 1255 * doesn't support multiple packet frags, so this will always 1256 * end up doing the pullup if we got more than one packet. 1257 */ 1258 if (i > xnf_max_tx_frags || page_oops) { 1259 if (page_oops) 1260 xnfp->xnf_stat_tx_pagebndry++; 1261 if ((xmitbuf = xnf_pullupmsg(xnfp, mp)) == NULL) { 1262 /* could not allocate resources? */ 1263 #ifdef XNF_DEBUG 1264 cmn_err(CE_WARN, "xnf%d: pullupmsg failed", 1265 ddi_get_instance(xnfp->xnf_devinfo)); 1266 #endif 1267 xnfp->xnf_stat_tx_defer++; 1268 return (B_FALSE); /* Retry send */ 1269 } 1270 bufaddr = xmitbuf->buf; 1271 } else { 1272 xmitbuf = NULL; 1273 bufaddr = (caddr_t)mp->b_rptr; 1274 } 1275 1276 /* set up data descriptor */ 1277 length = pktlen; 1278 1279 /* 1280 * Get packet id from free list 1281 */ 1282 tx_id = xnfp->xnf_tx_pkt_id_list; 1283 ASSERT(tx_id < NET_TX_RING_SIZE); 1284 txp_info = &xnfp->xnf_tx_pkt_info[tx_id]; 1285 xnfp->xnf_tx_pkt_id_list = txp_info->id; 1286 txp_info->id = tx_id; 1287 1288 /* Prepare for DMA mapping of tx buffer(s) */ 1289 rc = ddi_dma_addr_bind_handle(txp_info->dma_handle, 1290 NULL, bufaddr, length, DDI_DMA_WRITE | DDI_DMA_STREAMING, 1291 DDI_DMA_DONTWAIT, 0, &dma_cookie, &ncookies); 1292 if (rc != DDI_DMA_MAPPED) { 1293 ASSERT(rc != DDI_DMA_INUSE); 1294 ASSERT(rc != DDI_DMA_PARTIAL_MAP); 1295 /* 1296 * Return id to free list 1297 */ 1298 txp_info->id = xnfp->xnf_tx_pkt_id_list; 1299 xnfp->xnf_tx_pkt_id_list = tx_id; 1300 if (rc == DDI_DMA_NORESOURCES) { 1301 xnfp->xnf_stat_tx_defer++; 1302 return (B_FALSE); /* Retry later */ 1303 } 1304 #ifdef XNF_DEBUG 1305 cmn_err(CE_WARN, "xnf%d: bind_handle failed (%x)", 1306 ddi_get_instance(xnfp->xnf_devinfo), rc); 1307 #endif 1308 return (B_FALSE); 1309 } 1310 1311 ASSERT(ncookies == 1); 1312 ref = gnttab_claim_grant_reference(&xnfp->xnf_gref_tx_head); 1313 ASSERT((signed short)ref >= 0); 1314 mfn = xnf_btop(pa_to_ma((paddr_t)dma_cookie.dmac_laddress)); 1315 gnttab_grant_foreign_access_ref(ref, oeid, mfn, 1316 xnfp->xnf_tx_pages_readonly); 1317 txp_info->grant_ref = ref; 1318 txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot); 1319 txrp->gref = ref; 1320 txrp->size = dma_cookie.dmac_size; 1321 txrp->offset = (uintptr_t)bufaddr & PAGEOFFSET; 1322 txrp->id = tx_id; 1323 txrp->flags = 0; 1324 hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags); 1325 if (pflags != 0) { 1326 ASSERT(xnfp->xnf_cksum_offload); 1327 /* 1328 * If the local protocol stack requests checksum 1329 * offload we set the 'checksum blank' flag, 1330 * indicating to the peer that we need the checksum 1331 * calculated for us. 1332 * 1333 * We _don't_ set the validated flag, because we haven't 1334 * validated that the data and the checksum match. 1335 */ 1336 xnf_pseudo_cksum(bufaddr, length); 1337 txrp->flags |= NETTXF_csum_blank; 1338 xnfp->xnf_stat_tx_cksum_deferred++; 1339 } 1340 membar_producer(); 1341 xnfp->xnf_tx_ring.req_prod_pvt = slot + 1; 1342 1343 txp_info->mp = mp; 1344 txp_info->bdesc = xmitbuf; 1345 1346 xnfp->xnf_stat_opackets++; 1347 xnfp->xnf_stat_obytes += pktlen; 1348 1349 return (B_TRUE); /* successful transmit attempt */ 1350 } 1351 1352 mblk_t * 1353 xnf_send(void *arg, mblk_t *mp) 1354 { 1355 xnf_t *xnfp = arg; 1356 mblk_t *next; 1357 boolean_t sent_something = B_FALSE; 1358 1359 mutex_enter(&xnfp->xnf_txlock); 1360 1361 /* 1362 * Transmission attempts should be impossible without having 1363 * previously called xnf_start(). 1364 */ 1365 ASSERT(xnfp->xnf_running); 1366 1367 /* 1368 * Wait for getting connected to the backend 1369 */ 1370 while (!xnfp->xnf_connected) { 1371 cv_wait(&xnfp->xnf_cv, &xnfp->xnf_txlock); 1372 } 1373 1374 while (mp != NULL) { 1375 next = mp->b_next; 1376 mp->b_next = NULL; 1377 1378 if (!xnf_send_one(xnfp, mp)) { 1379 mp->b_next = next; 1380 break; 1381 } 1382 1383 mp = next; 1384 sent_something = B_TRUE; 1385 } 1386 1387 if (sent_something) { 1388 boolean_t notify; 1389 1390 /* LINTED: constant in conditional context */ 1391 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring, 1392 notify); 1393 if (notify) 1394 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1395 } 1396 1397 if (mp != NULL) 1398 xnfp->xnf_need_sched = B_TRUE; 1399 1400 mutex_exit(&xnfp->xnf_txlock); 1401 1402 return (mp); 1403 } 1404 1405 /* 1406 * xnf_intr() -- ring interrupt service routine 1407 */ 1408 static uint_t 1409 xnf_intr(caddr_t arg) 1410 { 1411 xnf_t *xnfp = (xnf_t *)arg; 1412 boolean_t sched = B_FALSE; 1413 1414 mutex_enter(&xnfp->xnf_intrlock); 1415 1416 /* spurious intr */ 1417 if (!xnfp->xnf_connected) { 1418 mutex_exit(&xnfp->xnf_intrlock); 1419 xnfp->xnf_stat_unclaimed_interrupts++; 1420 return (DDI_INTR_UNCLAIMED); 1421 } 1422 1423 #ifdef XNF_DEBUG 1424 if (xnfdebug & XNF_DEBUG_INT) 1425 printf("xnf%d intr(0x%p)\n", 1426 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1427 #endif 1428 if (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1429 mblk_t *mp; 1430 1431 if (xnfp->xnf_rx_hvcopy) 1432 mp = xnf_process_hvcopy_recv(xnfp); 1433 else 1434 mp = xnf_process_recv(xnfp); 1435 1436 if (mp != NULL) 1437 mac_rx(xnfp->xnf_mh, NULL, mp); 1438 } 1439 1440 xnfp->xnf_stat_interrupts++; 1441 mutex_exit(&xnfp->xnf_intrlock); 1442 1443 /* 1444 * Clean tx ring and try to start any blocked xmit streams if 1445 * there is now some space. 1446 */ 1447 mutex_enter(&xnfp->xnf_txlock); 1448 if (xnf_clean_tx_ring(xnfp) > 0) { 1449 sched = xnfp->xnf_need_sched; 1450 xnfp->xnf_need_sched = B_FALSE; 1451 } 1452 mutex_exit(&xnfp->xnf_txlock); 1453 1454 if (sched) 1455 mac_tx_update(xnfp->xnf_mh); 1456 1457 return (DDI_INTR_CLAIMED); 1458 } 1459 1460 /* 1461 * xnf_start() -- start the board receiving and enable interrupts. 1462 */ 1463 static int 1464 xnf_start(void *arg) 1465 { 1466 xnf_t *xnfp = arg; 1467 1468 #ifdef XNF_DEBUG 1469 if (xnfdebug & XNF_DEBUG_TRACE) 1470 printf("xnf%d start(0x%p)\n", 1471 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1472 #endif 1473 1474 mutex_enter(&xnfp->xnf_intrlock); 1475 mutex_enter(&xnfp->xnf_txlock); 1476 1477 /* Accept packets from above. */ 1478 xnfp->xnf_running = B_TRUE; 1479 1480 mutex_exit(&xnfp->xnf_txlock); 1481 mutex_exit(&xnfp->xnf_intrlock); 1482 1483 return (0); 1484 } 1485 1486 /* xnf_stop() - disable hardware */ 1487 static void 1488 xnf_stop(void *arg) 1489 { 1490 xnf_t *xnfp = arg; 1491 1492 #ifdef XNF_DEBUG 1493 if (xnfdebug & XNF_DEBUG_TRACE) 1494 printf("xnf%d stop(0x%p)\n", 1495 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1496 #endif 1497 1498 mutex_enter(&xnfp->xnf_intrlock); 1499 mutex_enter(&xnfp->xnf_txlock); 1500 1501 xnfp->xnf_running = B_FALSE; 1502 1503 mutex_exit(&xnfp->xnf_txlock); 1504 mutex_exit(&xnfp->xnf_intrlock); 1505 } 1506 1507 /* 1508 * Driver private functions follow 1509 */ 1510 1511 /* 1512 * Hang buffer on rx ring 1513 */ 1514 static void 1515 rx_buffer_hang(xnf_t *xnfp, struct xnf_buffer_desc *bdesc) 1516 { 1517 volatile netif_rx_request_t *reqp; 1518 RING_IDX hang_ix; 1519 grant_ref_t ref; 1520 domid_t oeid; 1521 1522 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 1523 1524 ASSERT(MUTEX_HELD(&xnfp->xnf_intrlock)); 1525 reqp = RING_GET_REQUEST(&xnfp->xnf_rx_ring, 1526 xnfp->xnf_rx_ring.req_prod_pvt); 1527 hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0)); 1528 ASSERT(xnfp->xnf_rxpkt_bufptr[hang_ix] == NULL); 1529 if (bdesc->grant_ref == GRANT_INVALID_REF) { 1530 ref = gnttab_claim_grant_reference(&xnfp->xnf_gref_rx_head); 1531 ASSERT((signed short)ref >= 0); 1532 bdesc->grant_ref = ref; 1533 if (xnfp->xnf_rx_hvcopy) { 1534 pfn_t pfn = xnf_btop(bdesc->buf_phys); 1535 mfn_t mfn = pfn_to_mfn(pfn); 1536 1537 gnttab_grant_foreign_access_ref(ref, oeid, mfn, 0); 1538 } else { 1539 gnttab_grant_foreign_transfer_ref(ref, oeid, 0); 1540 } 1541 } 1542 reqp->id = hang_ix; 1543 reqp->gref = bdesc->grant_ref; 1544 bdesc->id = hang_ix; 1545 xnfp->xnf_rxpkt_bufptr[hang_ix] = bdesc; 1546 membar_producer(); 1547 xnfp->xnf_rx_ring.req_prod_pvt++; 1548 } 1549 1550 static mblk_t * 1551 xnf_process_hvcopy_recv(xnf_t *xnfp) 1552 { 1553 netif_rx_response_t *rxpkt; 1554 mblk_t *mp, *head, *tail; 1555 struct xnf_buffer_desc *bdesc; 1556 boolean_t hwcsum = B_FALSE, notify, work_to_do; 1557 size_t len; 1558 1559 /* 1560 * in loop over unconsumed responses, we do: 1561 * 1. get a response 1562 * 2. take corresponding buffer off recv. ring 1563 * 3. indicate this by setting slot to NULL 1564 * 4. create a new message and 1565 * 5. copy data in, adjust ptr 1566 * 1567 * outside loop: 1568 * 7. make sure no more data has arrived; kick HV 1569 */ 1570 1571 head = tail = NULL; 1572 1573 loop: 1574 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1575 1576 /* 1. */ 1577 rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring, 1578 xnfp->xnf_rx_ring.rsp_cons); 1579 1580 DTRACE_PROBE4(got_PKT, int, (int)rxpkt->id, int, 1581 (int)rxpkt->offset, 1582 int, (int)rxpkt->flags, int, (int)rxpkt->status); 1583 1584 /* 1585 * 2. 1586 * Take buffer off of receive ring 1587 */ 1588 hwcsum = B_FALSE; 1589 bdesc = xnfp->xnf_rxpkt_bufptr[rxpkt->id]; 1590 /* 3 */ 1591 xnfp->xnf_rxpkt_bufptr[rxpkt->id] = NULL; 1592 ASSERT(bdesc->id == rxpkt->id); 1593 mp = NULL; 1594 if (!xnfp->xnf_running) { 1595 DTRACE_PROBE4(pkt_dropped, int, rxpkt->status, 1596 char *, bdesc->buf, int, rxpkt->offset, 1597 char *, ((char *)bdesc->buf) + rxpkt->offset); 1598 xnfp->xnf_stat_drop++; 1599 /* 1600 * re-hang the buffer 1601 */ 1602 rx_buffer_hang(xnfp, bdesc); 1603 } else if (rxpkt->status <= 0) { 1604 DTRACE_PROBE4(pkt_status_negative, int, rxpkt->status, 1605 char *, bdesc->buf, int, rxpkt->offset, 1606 char *, ((char *)bdesc->buf) + rxpkt->offset); 1607 xnfp->xnf_stat_errrx++; 1608 if (rxpkt->status == 0) 1609 xnfp->xnf_stat_runt++; 1610 if (rxpkt->status == NETIF_RSP_ERROR) 1611 xnfp->xnf_stat_mac_rcv_error++; 1612 if (rxpkt->status == NETIF_RSP_DROPPED) 1613 xnfp->xnf_stat_norxbuf++; 1614 /* 1615 * re-hang the buffer 1616 */ 1617 rx_buffer_hang(xnfp, bdesc); 1618 } else { 1619 grant_ref_t ref = bdesc->grant_ref; 1620 struct xnf_buffer_desc *new_bdesc; 1621 unsigned long off = rxpkt->offset; 1622 1623 DTRACE_PROBE4(pkt_status_ok, int, rxpkt->status, 1624 char *, bdesc->buf, int, rxpkt->offset, 1625 char *, ((char *)bdesc->buf) + rxpkt->offset); 1626 len = rxpkt->status; 1627 ASSERT(off + len <= PAGEOFFSET); 1628 if (ref == GRANT_INVALID_REF) { 1629 mp = NULL; 1630 new_bdesc = bdesc; 1631 cmn_err(CE_WARN, "Bad rx grant reference %d " 1632 "from dom %d", ref, 1633 xvdi_get_oeid(xnfp->xnf_devinfo)); 1634 goto luckless; 1635 } 1636 /* 1637 * Release ref which we'll be re-claiming in 1638 * rx_buffer_hang(). 1639 */ 1640 bdesc->grant_ref = GRANT_INVALID_REF; 1641 (void) gnttab_end_foreign_access_ref(ref, 0); 1642 gnttab_release_grant_reference(&xnfp->xnf_gref_rx_head, 1643 ref); 1644 if (rxpkt->flags & NETRXF_data_validated) 1645 hwcsum = B_TRUE; 1646 1647 /* 1648 * XXPV for the initial implementation of HVcopy, 1649 * create a new msg and copy in the data 1650 */ 1651 /* 4. */ 1652 if ((mp = allocb(len, BPRI_MED)) == NULL) { 1653 /* 1654 * Couldn't get buffer to copy to, 1655 * drop this data, and re-hang 1656 * the buffer on the ring. 1657 */ 1658 xnfp->xnf_stat_norxbuf++; 1659 DTRACE_PROBE(alloc_nix); 1660 } else { 1661 /* 5. */ 1662 DTRACE_PROBE(alloc_ok); 1663 bcopy(bdesc->buf + off, mp->b_wptr, 1664 len); 1665 mp->b_wptr += len; 1666 } 1667 new_bdesc = bdesc; 1668 luckless: 1669 1670 /* Re-hang old or hang new buffer. */ 1671 rx_buffer_hang(xnfp, new_bdesc); 1672 } 1673 if (mp) { 1674 if (hwcsum) { 1675 /* 1676 * See comments in xnf_process_recv(). 1677 */ 1678 1679 (void) hcksum_assoc(mp, NULL, 1680 NULL, 0, 0, 0, 0, 1681 HCK_FULLCKSUM | 1682 HCK_FULLCKSUM_OK, 1683 0); 1684 xnfp->xnf_stat_rx_cksum_no_need++; 1685 } 1686 if (head == NULL) { 1687 head = tail = mp; 1688 } else { 1689 tail->b_next = mp; 1690 tail = mp; 1691 } 1692 1693 ASSERT(mp->b_next == NULL); 1694 1695 xnfp->xnf_stat_ipackets++; 1696 xnfp->xnf_stat_rbytes += len; 1697 } 1698 1699 xnfp->xnf_rx_ring.rsp_cons++; 1700 1701 xnfp->xnf_stat_hvcopy_packet_processed++; 1702 } 1703 1704 /* 7. */ 1705 /* 1706 * Has more data come in since we started? 1707 */ 1708 /* LINTED: constant in conditional context */ 1709 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_rx_ring, work_to_do); 1710 if (work_to_do) 1711 goto loop; 1712 1713 /* 1714 * Indicate to the backend that we have re-filled the receive 1715 * ring. 1716 */ 1717 /* LINTED: constant in conditional context */ 1718 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify); 1719 if (notify) 1720 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1721 1722 return (head); 1723 } 1724 1725 /* Process all queued received packets */ 1726 static mblk_t * 1727 xnf_process_recv(xnf_t *xnfp) 1728 { 1729 volatile netif_rx_response_t *rxpkt; 1730 mblk_t *mp, *head, *tail; 1731 struct xnf_buffer_desc *bdesc; 1732 extern mblk_t *desballoc(unsigned char *, size_t, uint_t, frtn_t *); 1733 boolean_t hwcsum = B_FALSE, notify, work_to_do; 1734 size_t len; 1735 pfn_t pfn; 1736 long cnt; 1737 1738 head = tail = NULL; 1739 loop: 1740 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1741 1742 rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring, 1743 xnfp->xnf_rx_ring.rsp_cons); 1744 1745 /* 1746 * Take buffer off of receive ring 1747 */ 1748 hwcsum = B_FALSE; 1749 bdesc = xnfp->xnf_rxpkt_bufptr[rxpkt->id]; 1750 xnfp->xnf_rxpkt_bufptr[rxpkt->id] = NULL; 1751 ASSERT(bdesc->id == rxpkt->id); 1752 mp = NULL; 1753 if (!xnfp->xnf_running) { 1754 xnfp->xnf_stat_drop++; 1755 /* 1756 * re-hang the buffer 1757 */ 1758 rx_buffer_hang(xnfp, bdesc); 1759 } else if (rxpkt->status <= 0) { 1760 xnfp->xnf_stat_errrx++; 1761 if (rxpkt->status == 0) 1762 xnfp->xnf_stat_runt++; 1763 if (rxpkt->status == NETIF_RSP_ERROR) 1764 xnfp->xnf_stat_mac_rcv_error++; 1765 if (rxpkt->status == NETIF_RSP_DROPPED) 1766 xnfp->xnf_stat_norxbuf++; 1767 /* 1768 * re-hang the buffer 1769 */ 1770 rx_buffer_hang(xnfp, bdesc); 1771 } else { 1772 grant_ref_t ref = bdesc->grant_ref; 1773 struct xnf_buffer_desc *new_bdesc; 1774 unsigned long off = rxpkt->offset; 1775 unsigned long mfn; 1776 1777 len = rxpkt->status; 1778 ASSERT(off + len <= PAGEOFFSET); 1779 if (ref == GRANT_INVALID_REF) { 1780 mp = NULL; 1781 new_bdesc = bdesc; 1782 cmn_err(CE_WARN, "Bad rx grant reference %d " 1783 "from dom %d", ref, 1784 xvdi_get_oeid(xnfp->xnf_devinfo)); 1785 goto luckless; 1786 } 1787 bdesc->grant_ref = GRANT_INVALID_REF; 1788 mfn = gnttab_end_foreign_transfer_ref(ref); 1789 ASSERT(mfn != MFN_INVALID); 1790 ASSERT(hat_getpfnum(kas.a_hat, bdesc->buf) == 1791 PFN_INVALID); 1792 1793 gnttab_release_grant_reference(&xnfp->xnf_gref_rx_head, 1794 ref); 1795 reassign_pfn(xnf_btop(bdesc->buf_phys), mfn); 1796 hat_devload(kas.a_hat, bdesc->buf, PAGESIZE, 1797 xnf_btop(bdesc->buf_phys), 1798 PROT_READ | PROT_WRITE, HAT_LOAD); 1799 balloon_drv_added(1); 1800 1801 if (rxpkt->flags & NETRXF_data_validated) 1802 hwcsum = B_TRUE; 1803 if (len <= xnf_rx_bcopy_thresh) { 1804 /* 1805 * For small buffers, just copy the data 1806 * and send the copy upstream. 1807 */ 1808 new_bdesc = NULL; 1809 } else { 1810 /* 1811 * We send a pointer to this data upstream; 1812 * we need a new buffer to replace this one. 1813 */ 1814 mutex_enter(&xnfp->xnf_rx_buf_mutex); 1815 new_bdesc = xnf_get_buffer(xnfp); 1816 if (new_bdesc != NULL) { 1817 xnfp->xnf_rx_bufs_outstanding++; 1818 } else { 1819 xnfp->xnf_stat_rx_no_ringbuf++; 1820 } 1821 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1822 } 1823 1824 if (new_bdesc == NULL) { 1825 /* 1826 * Don't have a new ring buffer; bcopy the data 1827 * from the buffer, and preserve the 1828 * original buffer 1829 */ 1830 if ((mp = allocb(len, BPRI_MED)) == NULL) { 1831 /* 1832 * Could't get buffer to copy to, 1833 * drop this data, and re-hang 1834 * the buffer on the ring. 1835 */ 1836 xnfp->xnf_stat_norxbuf++; 1837 } else { 1838 bcopy(bdesc->buf + off, mp->b_wptr, 1839 len); 1840 } 1841 /* 1842 * Give the buffer page back to xen 1843 */ 1844 pfn = xnf_btop(bdesc->buf_phys); 1845 cnt = balloon_free_pages(1, &mfn, bdesc->buf, 1846 &pfn); 1847 if (cnt != 1) { 1848 cmn_err(CE_WARN, "unable to give a " 1849 "page back to the hypervisor\n"); 1850 } 1851 new_bdesc = bdesc; 1852 } else { 1853 if ((mp = desballoc((unsigned char *)bdesc->buf, 1854 off + len, 0, (frtn_t *)bdesc)) == NULL) { 1855 /* 1856 * Couldn't get mblk to pass recv data 1857 * up with, free the old ring buffer 1858 */ 1859 xnfp->xnf_stat_norxbuf++; 1860 xnf_rcv_complete(bdesc); 1861 goto luckless; 1862 } 1863 (void) ddi_dma_sync(bdesc->dma_handle, 1864 0, 0, DDI_DMA_SYNC_FORCPU); 1865 1866 mp->b_wptr += off; 1867 mp->b_rptr += off; 1868 } 1869 luckless: 1870 if (mp) 1871 mp->b_wptr += len; 1872 /* re-hang old or hang new buffer */ 1873 rx_buffer_hang(xnfp, new_bdesc); 1874 } 1875 if (mp) { 1876 if (hwcsum) { 1877 /* 1878 * If the peer says that the data has 1879 * been validated then we declare that 1880 * the full checksum has been 1881 * verified. 1882 * 1883 * We don't look at the "checksum 1884 * blank" flag, and hence could have a 1885 * packet here that we are asserting 1886 * is good with a blank checksum. 1887 * 1888 * The hardware checksum offload 1889 * specification says that we must 1890 * provide the actual checksum as well 1891 * as an assertion that it is valid, 1892 * but the protocol stack doesn't 1893 * actually use it and some other 1894 * drivers don't bother, so we don't. 1895 * If it was necessary we could grovel 1896 * in the packet to find it. 1897 */ 1898 1899 (void) hcksum_assoc(mp, NULL, 1900 NULL, 0, 0, 0, 0, 1901 HCK_FULLCKSUM | 1902 HCK_FULLCKSUM_OK, 1903 0); 1904 xnfp->xnf_stat_rx_cksum_no_need++; 1905 } 1906 if (head == NULL) { 1907 head = tail = mp; 1908 } else { 1909 tail->b_next = mp; 1910 tail = mp; 1911 } 1912 1913 ASSERT(mp->b_next == NULL); 1914 1915 xnfp->xnf_stat_ipackets++; 1916 xnfp->xnf_stat_rbytes += len; 1917 } 1918 1919 xnfp->xnf_rx_ring.rsp_cons++; 1920 } 1921 1922 /* 1923 * Has more data come in since we started? 1924 */ 1925 /* LINTED: constant in conditional context */ 1926 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_rx_ring, work_to_do); 1927 if (work_to_do) 1928 goto loop; 1929 1930 /* 1931 * Indicate to the backend that we have re-filled the receive 1932 * ring. 1933 */ 1934 /* LINTED: constant in conditional context */ 1935 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify); 1936 if (notify) 1937 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1938 1939 return (head); 1940 } 1941 1942 /* Called when the upper layers free a message we passed upstream */ 1943 static void 1944 xnf_rcv_complete(struct xnf_buffer_desc *bdesc) 1945 { 1946 xnf_t *xnfp = bdesc->xnfp; 1947 pfn_t pfn; 1948 long cnt; 1949 1950 /* One less outstanding receive buffer */ 1951 mutex_enter(&xnfp->xnf_rx_buf_mutex); 1952 --xnfp->xnf_rx_bufs_outstanding; 1953 /* 1954 * Return buffer to the free list, unless the free list is getting 1955 * too large. XXPV - this threshold may need tuning. 1956 */ 1957 if (xnfp->xnf_rx_descs_free < xnf_rx_bufs_lowat) { 1958 /* 1959 * Unmap the page, and hand the machine page back 1960 * to xen so it can be re-used as a backend net buffer. 1961 */ 1962 pfn = xnf_btop(bdesc->buf_phys); 1963 cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn); 1964 if (cnt != 1) { 1965 cmn_err(CE_WARN, "unable to give a page back to the " 1966 "hypervisor\n"); 1967 } 1968 1969 bdesc->next = xnfp->xnf_free_list; 1970 xnfp->xnf_free_list = bdesc; 1971 xnfp->xnf_rx_descs_free++; 1972 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1973 } else { 1974 /* 1975 * We can return everything here since we have a free buffer 1976 * that we have not given the backing page for back to xen. 1977 */ 1978 --xnfp->xnf_rx_buffer_count; 1979 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1980 (void) ddi_dma_unbind_handle(bdesc->dma_handle); 1981 ddi_dma_mem_free(&bdesc->acc_handle); 1982 ddi_dma_free_handle(&bdesc->dma_handle); 1983 kmem_free(bdesc, sizeof (*bdesc)); 1984 } 1985 } 1986 1987 /* 1988 * xnf_alloc_dma_resources() -- initialize the drivers structures 1989 */ 1990 static int 1991 xnf_alloc_dma_resources(xnf_t *xnfp) 1992 { 1993 dev_info_t *devinfo = xnfp->xnf_devinfo; 1994 int i; 1995 size_t len; 1996 ddi_dma_cookie_t dma_cookie; 1997 uint_t ncookies; 1998 struct xnf_buffer_desc *bdesc; 1999 int rc; 2000 caddr_t rptr; 2001 2002 xnfp->xnf_n_rx = NET_RX_RING_SIZE; 2003 xnfp->xnf_max_rx_bufs = xnf_rx_bufs_hiwat; 2004 2005 xnfp->xnf_n_tx = NET_TX_RING_SIZE; 2006 2007 /* 2008 * The code below allocates all the DMA data structures that 2009 * need to be released when the driver is detached. 2010 * 2011 * First allocate handles for mapping (virtual address) pointers to 2012 * transmit data buffers to physical addresses 2013 */ 2014 for (i = 0; i < xnfp->xnf_n_tx; i++) { 2015 if ((rc = ddi_dma_alloc_handle(devinfo, 2016 &tx_buffer_dma_attr, DDI_DMA_SLEEP, 0, 2017 &xnfp->xnf_tx_pkt_info[i].dma_handle)) != DDI_SUCCESS) 2018 return (DDI_FAILURE); 2019 } 2020 2021 /* 2022 * Allocate page for the transmit descriptor ring. 2023 */ 2024 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 2025 DDI_DMA_SLEEP, 0, &xnfp->xnf_tx_ring_dma_handle) != DDI_SUCCESS) 2026 goto alloc_error; 2027 2028 if (ddi_dma_mem_alloc(xnfp->xnf_tx_ring_dma_handle, 2029 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 2030 DDI_DMA_SLEEP, 0, &rptr, &len, 2031 &xnfp->xnf_tx_ring_dma_acchandle) != DDI_SUCCESS) { 2032 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2033 xnfp->xnf_tx_ring_dma_handle = NULL; 2034 goto alloc_error; 2035 } 2036 2037 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_tx_ring_dma_handle, NULL, 2038 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 2039 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 2040 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 2041 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2042 xnfp->xnf_tx_ring_dma_handle = NULL; 2043 xnfp->xnf_tx_ring_dma_acchandle = NULL; 2044 if (rc == DDI_DMA_NORESOURCES) 2045 goto alloc_error; 2046 else 2047 goto error; 2048 } 2049 2050 ASSERT(ncookies == 1); 2051 bzero(rptr, PAGESIZE); 2052 /* LINTED: constant in conditional context */ 2053 SHARED_RING_INIT((netif_tx_sring_t *)rptr); 2054 /* LINTED: constant in conditional context */ 2055 FRONT_RING_INIT(&xnfp->xnf_tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE); 2056 xnfp->xnf_tx_ring_phys_addr = dma_cookie.dmac_laddress; 2057 2058 /* 2059 * Allocate page for the receive descriptor ring. 2060 */ 2061 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 2062 DDI_DMA_SLEEP, 0, &xnfp->xnf_rx_ring_dma_handle) != DDI_SUCCESS) 2063 goto alloc_error; 2064 2065 if (ddi_dma_mem_alloc(xnfp->xnf_rx_ring_dma_handle, 2066 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 2067 DDI_DMA_SLEEP, 0, &rptr, &len, 2068 &xnfp->xnf_rx_ring_dma_acchandle) != DDI_SUCCESS) { 2069 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2070 xnfp->xnf_rx_ring_dma_handle = NULL; 2071 goto alloc_error; 2072 } 2073 2074 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_rx_ring_dma_handle, NULL, 2075 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 2076 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 2077 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 2078 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2079 xnfp->xnf_rx_ring_dma_handle = NULL; 2080 xnfp->xnf_rx_ring_dma_acchandle = NULL; 2081 if (rc == DDI_DMA_NORESOURCES) 2082 goto alloc_error; 2083 else 2084 goto error; 2085 } 2086 2087 ASSERT(ncookies == 1); 2088 bzero(rptr, PAGESIZE); 2089 /* LINTED: constant in conditional context */ 2090 SHARED_RING_INIT((netif_rx_sring_t *)rptr); 2091 /* LINTED: constant in conditional context */ 2092 FRONT_RING_INIT(&xnfp->xnf_rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE); 2093 xnfp->xnf_rx_ring_phys_addr = dma_cookie.dmac_laddress; 2094 2095 /* 2096 * Preallocate receive buffers for each receive descriptor. 2097 */ 2098 2099 /* Set up the "free list" of receive buffer descriptors */ 2100 for (i = 0; i < xnfp->xnf_n_rx; i++) { 2101 if ((bdesc = xnf_alloc_buffer(xnfp)) == NULL) 2102 goto alloc_error; 2103 bdesc->next = xnfp->xnf_free_list; 2104 xnfp->xnf_free_list = bdesc; 2105 } 2106 2107 return (DDI_SUCCESS); 2108 2109 alloc_error: 2110 cmn_err(CE_WARN, "xnf%d: could not allocate enough DMA memory", 2111 ddi_get_instance(xnfp->xnf_devinfo)); 2112 error: 2113 xnf_release_dma_resources(xnfp); 2114 return (DDI_FAILURE); 2115 } 2116 2117 /* 2118 * Release all DMA resources in the opposite order from acquisition 2119 * Should not be called until all outstanding esballoc buffers 2120 * have been returned. 2121 */ 2122 static void 2123 xnf_release_dma_resources(xnf_t *xnfp) 2124 { 2125 int i; 2126 2127 /* 2128 * Free receive buffers which are currently associated with 2129 * descriptors 2130 */ 2131 for (i = 0; i < xnfp->xnf_n_rx; i++) { 2132 struct xnf_buffer_desc *bp; 2133 2134 if ((bp = xnfp->xnf_rxpkt_bufptr[i]) == NULL) 2135 continue; 2136 xnf_free_buffer(bp); 2137 xnfp->xnf_rxpkt_bufptr[i] = NULL; 2138 } 2139 2140 /* Free the receive ring buffer */ 2141 if (xnfp->xnf_rx_ring_dma_acchandle != NULL) { 2142 (void) ddi_dma_unbind_handle(xnfp->xnf_rx_ring_dma_handle); 2143 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 2144 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2145 xnfp->xnf_rx_ring_dma_acchandle = NULL; 2146 } 2147 /* Free the transmit ring buffer */ 2148 if (xnfp->xnf_tx_ring_dma_acchandle != NULL) { 2149 (void) ddi_dma_unbind_handle(xnfp->xnf_tx_ring_dma_handle); 2150 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 2151 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2152 xnfp->xnf_tx_ring_dma_acchandle = NULL; 2153 } 2154 2155 /* 2156 * Free handles for mapping (virtual address) pointers to 2157 * transmit data buffers to physical addresses 2158 */ 2159 for (i = 0; i < xnfp->xnf_n_tx; i++) { 2160 if (xnfp->xnf_tx_pkt_info[i].dma_handle != NULL) { 2161 ddi_dma_free_handle( 2162 &xnfp->xnf_tx_pkt_info[i].dma_handle); 2163 } 2164 } 2165 2166 } 2167 2168 static void 2169 xnf_release_mblks(xnf_t *xnfp) 2170 { 2171 int i; 2172 2173 for (i = 0; i < xnfp->xnf_n_tx; i++) { 2174 if (xnfp->xnf_tx_pkt_info[i].mp == NULL) 2175 continue; 2176 freemsg(xnfp->xnf_tx_pkt_info[i].mp); 2177 xnfp->xnf_tx_pkt_info[i].mp = NULL; 2178 (void) ddi_dma_unbind_handle( 2179 xnfp->xnf_tx_pkt_info[i].dma_handle); 2180 } 2181 } 2182 2183 /* 2184 * Remove a xmit buffer descriptor from the head of the free list and return 2185 * a pointer to it. If no buffers on list, attempt to allocate a new one. 2186 * Called with the tx_buf_mutex held. 2187 */ 2188 static struct xnf_buffer_desc * 2189 xnf_get_tx_buffer(xnf_t *xnfp) 2190 { 2191 struct xnf_buffer_desc *bdesc; 2192 2193 bdesc = xnfp->xnf_tx_free_list; 2194 if (bdesc != NULL) { 2195 xnfp->xnf_tx_free_list = bdesc->next; 2196 } else { 2197 bdesc = xnf_alloc_tx_buffer(xnfp); 2198 } 2199 return (bdesc); 2200 } 2201 2202 /* 2203 * Remove a buffer descriptor from the head of the free list and return 2204 * a pointer to it. If no buffers on list, attempt to allocate a new one. 2205 * Called with the rx_buf_mutex held. 2206 */ 2207 static struct xnf_buffer_desc * 2208 xnf_get_buffer(xnf_t *xnfp) 2209 { 2210 struct xnf_buffer_desc *bdesc; 2211 2212 bdesc = xnfp->xnf_free_list; 2213 if (bdesc != NULL) { 2214 xnfp->xnf_free_list = bdesc->next; 2215 xnfp->xnf_rx_descs_free--; 2216 } else { 2217 bdesc = xnf_alloc_buffer(xnfp); 2218 } 2219 return (bdesc); 2220 } 2221 2222 /* 2223 * Free a xmit buffer back to the xmit free list 2224 */ 2225 static void 2226 xnf_free_tx_buffer(struct xnf_buffer_desc *bp) 2227 { 2228 xnf_t *xnfp = bp->xnfp; 2229 2230 mutex_enter(&xnfp->xnf_tx_buf_mutex); 2231 bp->next = xnfp->xnf_tx_free_list; 2232 xnfp->xnf_tx_free_list = bp; 2233 mutex_exit(&xnfp->xnf_tx_buf_mutex); 2234 } 2235 2236 /* 2237 * Put a buffer descriptor onto the head of the free list. 2238 * for page-flip: 2239 * We can't really free these buffers back to the kernel 2240 * since we have given away their backing page to be used 2241 * by the back end net driver. 2242 * for hvcopy: 2243 * release all the memory 2244 */ 2245 static void 2246 xnf_free_buffer(struct xnf_buffer_desc *bdesc) 2247 { 2248 xnf_t *xnfp = bdesc->xnfp; 2249 2250 mutex_enter(&xnfp->xnf_rx_buf_mutex); 2251 if (xnfp->xnf_rx_hvcopy) { 2252 if (ddi_dma_unbind_handle(bdesc->dma_handle) != DDI_SUCCESS) 2253 goto out; 2254 ddi_dma_mem_free(&bdesc->acc_handle); 2255 ddi_dma_free_handle(&bdesc->dma_handle); 2256 kmem_free(bdesc, sizeof (*bdesc)); 2257 xnfp->xnf_rx_buffer_count--; 2258 } else { 2259 bdesc->next = xnfp->xnf_free_list; 2260 xnfp->xnf_free_list = bdesc; 2261 xnfp->xnf_rx_descs_free++; 2262 } 2263 out: 2264 mutex_exit(&xnfp->xnf_rx_buf_mutex); 2265 } 2266 2267 /* 2268 * Allocate a DMA-able xmit buffer, including a structure to 2269 * keep track of the buffer. Called with tx_buf_mutex held. 2270 */ 2271 static struct xnf_buffer_desc * 2272 xnf_alloc_tx_buffer(xnf_t *xnfp) 2273 { 2274 struct xnf_buffer_desc *bdesc; 2275 size_t len; 2276 2277 if ((bdesc = kmem_zalloc(sizeof (*bdesc), KM_NOSLEEP)) == NULL) 2278 return (NULL); 2279 2280 /* allocate a DMA access handle for receive buffer */ 2281 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &tx_buffer_dma_attr, 2282 0, 0, &bdesc->dma_handle) != DDI_SUCCESS) 2283 goto failure; 2284 2285 /* Allocate DMA-able memory for transmit buffer */ 2286 if (ddi_dma_mem_alloc(bdesc->dma_handle, 2287 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0, 2288 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 2289 goto failure_1; 2290 2291 bdesc->xnfp = xnfp; 2292 xnfp->xnf_tx_buffer_count++; 2293 2294 return (bdesc); 2295 2296 failure_1: 2297 ddi_dma_free_handle(&bdesc->dma_handle); 2298 2299 failure: 2300 kmem_free(bdesc, sizeof (*bdesc)); 2301 return (NULL); 2302 } 2303 2304 /* 2305 * Allocate a DMA-able receive buffer, including a structure to 2306 * keep track of the buffer. Called with rx_buf_mutex held. 2307 */ 2308 static struct xnf_buffer_desc * 2309 xnf_alloc_buffer(xnf_t *xnfp) 2310 { 2311 struct xnf_buffer_desc *bdesc; 2312 size_t len; 2313 uint_t ncookies; 2314 ddi_dma_cookie_t dma_cookie; 2315 long cnt; 2316 pfn_t pfn; 2317 2318 if (xnfp->xnf_rx_buffer_count >= xnfp->xnf_max_rx_bufs) 2319 return (NULL); 2320 2321 if ((bdesc = kmem_zalloc(sizeof (*bdesc), KM_NOSLEEP)) == NULL) 2322 return (NULL); 2323 2324 /* allocate a DMA access handle for receive buffer */ 2325 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &rx_buffer_dma_attr, 2326 0, 0, &bdesc->dma_handle) != DDI_SUCCESS) 2327 goto failure; 2328 2329 /* Allocate DMA-able memory for receive buffer */ 2330 if (ddi_dma_mem_alloc(bdesc->dma_handle, 2331 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0, 2332 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 2333 goto failure_1; 2334 2335 /* bind to virtual address of buffer to get physical address */ 2336 if (ddi_dma_addr_bind_handle(bdesc->dma_handle, NULL, 2337 bdesc->buf, PAGESIZE, DDI_DMA_READ | DDI_DMA_STREAMING, 2338 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies) != DDI_DMA_MAPPED) 2339 goto failure_2; 2340 2341 bdesc->buf_phys = dma_cookie.dmac_laddress; 2342 bdesc->xnfp = xnfp; 2343 if (xnfp->xnf_rx_hvcopy) { 2344 bdesc->free_rtn.free_func = xnf_copy_rcv_complete; 2345 } else { 2346 bdesc->free_rtn.free_func = xnf_rcv_complete; 2347 } 2348 bdesc->free_rtn.free_arg = (char *)bdesc; 2349 bdesc->grant_ref = GRANT_INVALID_REF; 2350 ASSERT(ncookies == 1); 2351 2352 xnfp->xnf_rx_buffer_count++; 2353 2354 if (!xnfp->xnf_rx_hvcopy) { 2355 /* 2356 * Unmap the page, and hand the machine page back 2357 * to xen so it can be used as a backend net buffer. 2358 */ 2359 pfn = xnf_btop(bdesc->buf_phys); 2360 cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn); 2361 if (cnt != 1) { 2362 cmn_err(CE_WARN, "unable to give a page back to the " 2363 "hypervisor\n"); 2364 } 2365 } 2366 2367 return (bdesc); 2368 2369 failure_2: 2370 ddi_dma_mem_free(&bdesc->acc_handle); 2371 2372 failure_1: 2373 ddi_dma_free_handle(&bdesc->dma_handle); 2374 2375 failure: 2376 kmem_free(bdesc, sizeof (*bdesc)); 2377 return (NULL); 2378 } 2379 2380 /* 2381 * Statistics. 2382 */ 2383 static char *xnf_aux_statistics[] = { 2384 "tx_cksum_deferred", 2385 "rx_cksum_no_need", 2386 "interrupts", 2387 "unclaimed_interrupts", 2388 "tx_pullup", 2389 "tx_pagebndry", 2390 "tx_attempt", 2391 "rx_no_ringbuf", 2392 "hvcopy_packet_processed", 2393 }; 2394 2395 static int 2396 xnf_kstat_aux_update(kstat_t *ksp, int flag) 2397 { 2398 xnf_t *xnfp; 2399 kstat_named_t *knp; 2400 2401 if (flag != KSTAT_READ) 2402 return (EACCES); 2403 2404 xnfp = ksp->ks_private; 2405 knp = ksp->ks_data; 2406 2407 /* 2408 * Assignment order must match that of the names in 2409 * xnf_aux_statistics. 2410 */ 2411 (knp++)->value.ui64 = xnfp->xnf_stat_tx_cksum_deferred; 2412 (knp++)->value.ui64 = xnfp->xnf_stat_rx_cksum_no_need; 2413 2414 (knp++)->value.ui64 = xnfp->xnf_stat_interrupts; 2415 (knp++)->value.ui64 = xnfp->xnf_stat_unclaimed_interrupts; 2416 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pullup; 2417 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pagebndry; 2418 (knp++)->value.ui64 = xnfp->xnf_stat_tx_attempt; 2419 (knp++)->value.ui64 = xnfp->xnf_stat_rx_no_ringbuf; 2420 2421 (knp++)->value.ui64 = xnfp->xnf_stat_hvcopy_packet_processed; 2422 2423 return (0); 2424 } 2425 2426 static boolean_t 2427 xnf_kstat_init(xnf_t *xnfp) 2428 { 2429 int nstat = sizeof (xnf_aux_statistics) / 2430 sizeof (xnf_aux_statistics[0]); 2431 char **cp = xnf_aux_statistics; 2432 kstat_named_t *knp; 2433 2434 /* 2435 * Create and initialise kstats. 2436 */ 2437 if ((xnfp->xnf_kstat_aux = kstat_create("xnf", 2438 ddi_get_instance(xnfp->xnf_devinfo), 2439 "aux_statistics", "net", KSTAT_TYPE_NAMED, 2440 nstat, 0)) == NULL) 2441 return (B_FALSE); 2442 2443 xnfp->xnf_kstat_aux->ks_private = xnfp; 2444 xnfp->xnf_kstat_aux->ks_update = xnf_kstat_aux_update; 2445 2446 knp = xnfp->xnf_kstat_aux->ks_data; 2447 while (nstat > 0) { 2448 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 2449 2450 knp++; 2451 cp++; 2452 nstat--; 2453 } 2454 2455 kstat_install(xnfp->xnf_kstat_aux); 2456 2457 return (B_TRUE); 2458 } 2459 2460 static int 2461 xnf_stat(void *arg, uint_t stat, uint64_t *val) 2462 { 2463 xnf_t *xnfp = arg; 2464 2465 mutex_enter(&xnfp->xnf_intrlock); 2466 mutex_enter(&xnfp->xnf_txlock); 2467 2468 #define mac_stat(q, r) \ 2469 case (MAC_STAT_##q): \ 2470 *val = xnfp->xnf_stat_##r; \ 2471 break 2472 2473 #define ether_stat(q, r) \ 2474 case (ETHER_STAT_##q): \ 2475 *val = xnfp->xnf_stat_##r; \ 2476 break 2477 2478 switch (stat) { 2479 2480 mac_stat(IPACKETS, ipackets); 2481 mac_stat(OPACKETS, opackets); 2482 mac_stat(RBYTES, rbytes); 2483 mac_stat(OBYTES, obytes); 2484 mac_stat(NORCVBUF, norxbuf); 2485 mac_stat(IERRORS, errrx); 2486 mac_stat(NOXMTBUF, tx_defer); 2487 2488 ether_stat(MACRCV_ERRORS, mac_rcv_error); 2489 ether_stat(TOOSHORT_ERRORS, runt); 2490 2491 /* always claim to be in full duplex mode */ 2492 case ETHER_STAT_LINK_DUPLEX: 2493 *val = LINK_DUPLEX_FULL; 2494 break; 2495 2496 /* always claim to be at 1Gb/s link speed */ 2497 case MAC_STAT_IFSPEED: 2498 *val = 1000000000ull; 2499 break; 2500 2501 default: 2502 mutex_exit(&xnfp->xnf_txlock); 2503 mutex_exit(&xnfp->xnf_intrlock); 2504 2505 return (ENOTSUP); 2506 } 2507 2508 #undef mac_stat 2509 #undef ether_stat 2510 2511 mutex_exit(&xnfp->xnf_txlock); 2512 mutex_exit(&xnfp->xnf_intrlock); 2513 2514 return (0); 2515 } 2516 2517 /*ARGSUSED*/ 2518 static void 2519 xnf_ioctl(void *arg, queue_t *q, mblk_t *mp) 2520 { 2521 miocnak(q, mp, 0, EINVAL); 2522 } 2523 2524 static boolean_t 2525 xnf_getcapab(void *arg, mac_capab_t cap, void *cap_data) 2526 { 2527 xnf_t *xnfp = arg; 2528 2529 switch (cap) { 2530 case MAC_CAPAB_HCKSUM: { 2531 uint32_t *capab = cap_data; 2532 2533 /* 2534 * Whilst the flag used to communicate with the IO 2535 * domain is called "NETTXF_csum_blank", the checksum 2536 * in the packet must contain the pseudo-header 2537 * checksum and not zero. 2538 * 2539 * To help out the IO domain, we might use 2540 * HCKSUM_INET_PARTIAL. Unfortunately our stack will 2541 * then use checksum offload for IPv6 packets, which 2542 * the IO domain can't handle. 2543 * 2544 * As a result, we declare outselves capable of 2545 * HCKSUM_INET_FULL_V4. This means that we receive 2546 * IPv4 packets from the stack with a blank checksum 2547 * field and must insert the pseudo-header checksum 2548 * before passing the packet to the IO domain. 2549 */ 2550 if (xnfp->xnf_cksum_offload) 2551 *capab = HCKSUM_INET_FULL_V4; 2552 else 2553 *capab = 0; 2554 break; 2555 } 2556 default: 2557 return (B_FALSE); 2558 } 2559 2560 return (B_TRUE); 2561 } 2562 2563 /*ARGSUSED*/ 2564 static void 2565 oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 2566 void *arg, void *impl_data) 2567 { 2568 xnf_t *xnfp = ddi_get_driver_private(dip); 2569 XenbusState new_state = *(XenbusState *)impl_data; 2570 2571 ASSERT(xnfp != NULL); 2572 2573 switch (new_state) { 2574 case XenbusStateConnected: 2575 mutex_enter(&xnfp->xnf_intrlock); 2576 mutex_enter(&xnfp->xnf_txlock); 2577 2578 xnfp->xnf_connected = B_TRUE; 2579 /* 2580 * wake up threads wanting to send data to backend, 2581 * but got blocked due to backend is not ready 2582 */ 2583 cv_broadcast(&xnfp->xnf_cv); 2584 2585 mutex_exit(&xnfp->xnf_txlock); 2586 mutex_exit(&xnfp->xnf_intrlock); 2587 2588 /* 2589 * kick backend in case it missed any tx request 2590 * in the TX ring buffer 2591 */ 2592 ec_notify_via_evtchn(xnfp->xnf_evtchn); 2593 2594 /* 2595 * there maybe already queued rx data in the RX ring 2596 * sent by backend after it gets connected but before 2597 * we see its state change here, so we call our intr 2598 * handling routine to handle them, if any 2599 */ 2600 (void) xnf_intr((caddr_t)xnfp); 2601 2602 /* mark as link up after get connected */ 2603 mac_link_update(xnfp->xnf_mh, LINK_STATE_UP); 2604 2605 break; 2606 2607 default: 2608 break; 2609 } 2610 } 2611 2612 /* 2613 * Check whether backend is capable of and willing to talk 2614 * to us via hypervisor copy, as opposed to page flip. 2615 */ 2616 static boolean_t 2617 xnf_hvcopy_peer_status(dev_info_t *devinfo) 2618 { 2619 int be_rx_copy; 2620 int err; 2621 2622 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(devinfo), 2623 "feature-rx-copy", "%d", &be_rx_copy); 2624 /* 2625 * If we fail to read the store we assume that the key is 2626 * absent, implying an older domain at the far end. Older 2627 * domains cannot do HV copy (we assume ..). 2628 */ 2629 if (err != 0) 2630 be_rx_copy = 0; 2631 2632 return (be_rx_copy?B_TRUE:B_FALSE); 2633 } 2634