1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * 29 * Copyright (c) 2004 Christian Limpach. 30 * All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. This section intentionally left blank. 41 * 4. The name of the author may not be used to endorse or promote products 42 * derived from this software without specific prior written permission. 43 * 44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 49 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 53 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 */ 55 /* 56 * Section 3 of the above license was updated in response to bug 6379571. 57 */ 58 59 /* 60 * xnf.c - Nemo-based network driver for domU 61 */ 62 63 #include <sys/types.h> 64 #include <sys/errno.h> 65 #include <sys/param.h> 66 #include <sys/sysmacros.h> 67 #include <sys/systm.h> 68 #include <sys/stream.h> 69 #include <sys/strsubr.h> 70 #include <sys/conf.h> 71 #include <sys/ddi.h> 72 #include <sys/devops.h> 73 #include <sys/sunddi.h> 74 #include <sys/sunndi.h> 75 #include <sys/dlpi.h> 76 #include <sys/ethernet.h> 77 #include <sys/strsun.h> 78 #include <sys/pattr.h> 79 #include <inet/ip.h> 80 #include <inet/ip_impl.h> 81 #include <sys/gld.h> 82 #include <sys/modctl.h> 83 #include <sys/mac.h> 84 #include <sys/mac_ether.h> 85 #include <sys/bootinfo.h> 86 #include <sys/mach_mmu.h> 87 #ifdef XPV_HVM_DRIVER 88 #include <sys/xpv_support.h> 89 #include <sys/hypervisor.h> 90 #else 91 #include <sys/hypervisor.h> 92 #include <sys/evtchn_impl.h> 93 #include <sys/balloon_impl.h> 94 #endif 95 #include <xen/public/io/netif.h> 96 #include <sys/gnttab.h> 97 #include <xen/sys/xendev.h> 98 #include <sys/sdt.h> 99 100 #include <io/xnf.h> 101 102 103 /* 104 * Declarations and Module Linkage 105 */ 106 107 #define IDENT "Virtual Ethernet driver" 108 109 #if defined(DEBUG) || defined(__lint) 110 #define XNF_DEBUG 111 int xnfdebug = 0; 112 #endif 113 114 /* 115 * On a 32 bit PAE system physical and machine addresses are larger 116 * than 32 bits. ddi_btop() on such systems take an unsigned long 117 * argument, and so addresses above 4G are truncated before ddi_btop() 118 * gets to see them. To avoid this, code the shift operation here. 119 */ 120 #define xnf_btop(addr) ((addr) >> PAGESHIFT) 121 122 boolean_t xnf_cksum_offload = B_TRUE; 123 124 /* Default value for hypervisor-based copy operations */ 125 boolean_t xnf_rx_hvcopy = B_TRUE; 126 127 /* 128 * Should pages used for transmit be readonly for the peer? 129 */ 130 boolean_t xnf_tx_pages_readonly = B_FALSE; 131 /* 132 * Packets under this size are bcopied instead of using desballoc. 133 * Choose a value > XNF_FRAMESIZE (1514) to force the receive path to 134 * always copy. 135 */ 136 unsigned int xnf_rx_bcopy_thresh = 64; 137 138 unsigned int xnf_max_tx_frags = 1; 139 140 /* Required system entry points */ 141 static int xnf_attach(dev_info_t *, ddi_attach_cmd_t); 142 static int xnf_detach(dev_info_t *, ddi_detach_cmd_t); 143 144 /* Required driver entry points for Nemo */ 145 static int xnf_start(void *); 146 static void xnf_stop(void *); 147 static int xnf_set_mac_addr(void *, const uint8_t *); 148 static int xnf_set_multicast(void *, boolean_t, const uint8_t *); 149 static int xnf_set_promiscuous(void *, boolean_t); 150 static mblk_t *xnf_send(void *, mblk_t *); 151 static uint_t xnf_intr(caddr_t); 152 static int xnf_stat(void *, uint_t, uint64_t *); 153 static void xnf_blank(void *, time_t, uint_t); 154 static void xnf_resources(void *); 155 static void xnf_ioctl(void *, queue_t *, mblk_t *); 156 static boolean_t xnf_getcapab(void *, mac_capab_t, void *); 157 158 /* Driver private functions */ 159 static int xnf_alloc_dma_resources(xnf_t *); 160 static void xnf_release_dma_resources(xnf_t *); 161 static mblk_t *xnf_process_recv(xnf_t *); 162 static void xnf_rcv_complete(struct xnf_buffer_desc *); 163 static void xnf_release_mblks(xnf_t *); 164 static struct xnf_buffer_desc *xnf_alloc_tx_buffer(xnf_t *); 165 static struct xnf_buffer_desc *xnf_alloc_buffer(xnf_t *); 166 static struct xnf_buffer_desc *xnf_get_tx_buffer(xnf_t *); 167 static struct xnf_buffer_desc *xnf_get_buffer(xnf_t *); 168 static void xnf_free_buffer(struct xnf_buffer_desc *); 169 static void xnf_free_tx_buffer(struct xnf_buffer_desc *); 170 void xnf_send_driver_status(int, int); 171 static void rx_buffer_hang(xnf_t *, struct xnf_buffer_desc *); 172 static int xnf_clean_tx_ring(xnf_t *); 173 static void oe_state_change(dev_info_t *, ddi_eventcookie_t, 174 void *, void *); 175 static mblk_t *xnf_process_hvcopy_recv(xnf_t *xnfp); 176 static boolean_t xnf_hvcopy_peer_status(dev_info_t *devinfo); 177 static boolean_t xnf_kstat_init(xnf_t *xnfp); 178 179 /* 180 * XXPV dme: remove MC_IOCTL? 181 */ 182 static mac_callbacks_t xnf_callbacks = { 183 MC_RESOURCES | MC_IOCTL | MC_GETCAPAB, 184 xnf_stat, 185 xnf_start, 186 xnf_stop, 187 xnf_set_promiscuous, 188 xnf_set_multicast, 189 xnf_set_mac_addr, 190 xnf_send, 191 xnf_resources, 192 xnf_ioctl, 193 xnf_getcapab 194 }; 195 196 #define GRANT_INVALID_REF 0 197 const int xnf_rx_bufs_lowat = 4 * NET_RX_RING_SIZE; 198 const int xnf_rx_bufs_hiwat = 8 * NET_RX_RING_SIZE; /* default max */ 199 200 /* DMA attributes for network ring buffer */ 201 static ddi_dma_attr_t ringbuf_dma_attr = { 202 DMA_ATTR_V0, /* version of this structure */ 203 0, /* lowest usable address */ 204 0xffffffffffffffffULL, /* highest usable address */ 205 0x7fffffff, /* maximum DMAable byte count */ 206 MMU_PAGESIZE, /* alignment in bytes */ 207 0x7ff, /* bitmap of burst sizes */ 208 1, /* minimum transfer */ 209 0xffffffffU, /* maximum transfer */ 210 0xffffffffffffffffULL, /* maximum segment length */ 211 1, /* maximum number of segments */ 212 1, /* granularity */ 213 0, /* flags (reserved) */ 214 }; 215 216 /* DMA attributes for transmit data */ 217 static ddi_dma_attr_t tx_buffer_dma_attr = { 218 DMA_ATTR_V0, /* version of this structure */ 219 0, /* lowest usable address */ 220 0xffffffffffffffffULL, /* highest usable address */ 221 0x7fffffff, /* maximum DMAable byte count */ 222 MMU_PAGESIZE, /* alignment in bytes */ 223 0x7ff, /* bitmap of burst sizes */ 224 1, /* minimum transfer */ 225 0xffffffffU, /* maximum transfer */ 226 0xffffffffffffffffULL, /* maximum segment length */ 227 1, /* maximum number of segments */ 228 1, /* granularity */ 229 0, /* flags (reserved) */ 230 }; 231 232 /* DMA attributes for a receive buffer */ 233 static ddi_dma_attr_t rx_buffer_dma_attr = { 234 DMA_ATTR_V0, /* version of this structure */ 235 0, /* lowest usable address */ 236 0xffffffffffffffffULL, /* highest usable address */ 237 0x7fffffff, /* maximum DMAable byte count */ 238 MMU_PAGESIZE, /* alignment in bytes */ 239 0x7ff, /* bitmap of burst sizes */ 240 1, /* minimum transfer */ 241 0xffffffffU, /* maximum transfer */ 242 0xffffffffffffffffULL, /* maximum segment length */ 243 1, /* maximum number of segments */ 244 1, /* granularity */ 245 0, /* flags (reserved) */ 246 }; 247 248 /* DMA access attributes for registers and descriptors */ 249 static ddi_device_acc_attr_t accattr = { 250 DDI_DEVICE_ATTR_V0, 251 DDI_STRUCTURE_LE_ACC, /* This is a little-endian device */ 252 DDI_STRICTORDER_ACC 253 }; 254 255 /* DMA access attributes for data: NOT to be byte swapped. */ 256 static ddi_device_acc_attr_t data_accattr = { 257 DDI_DEVICE_ATTR_V0, 258 DDI_NEVERSWAP_ACC, 259 DDI_STRICTORDER_ACC 260 }; 261 262 unsigned char xnf_broadcastaddr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 263 int xnf_diagnose = 0; /* Patchable global for diagnostic purposes */ 264 265 DDI_DEFINE_STREAM_OPS(xnf_dev_ops, nulldev, nulldev, xnf_attach, xnf_detach, 266 nodev, NULL, D_MP, NULL); 267 268 static struct modldrv xnf_modldrv = { 269 &mod_driverops, 270 "Virtual Ethernet driver", 271 &xnf_dev_ops 272 }; 273 274 static struct modlinkage modlinkage = { 275 MODREV_1, &xnf_modldrv, NULL 276 }; 277 278 int 279 _init(void) 280 { 281 int r; 282 283 mac_init_ops(&xnf_dev_ops, "xnf"); 284 r = mod_install(&modlinkage); 285 if (r != DDI_SUCCESS) 286 mac_fini_ops(&xnf_dev_ops); 287 288 return (r); 289 } 290 291 int 292 _fini(void) 293 { 294 return (EBUSY); /* XXPV dme: should be removable */ 295 } 296 297 int 298 _info(struct modinfo *modinfop) 299 { 300 return (mod_info(&modlinkage, modinfop)); 301 } 302 303 static int 304 xnf_setup_rings(xnf_t *xnfp) 305 { 306 int ix, err; 307 RING_IDX i; 308 struct xnf_buffer_desc *bdesc, *rbp; 309 struct xenbus_device *xsd; 310 domid_t oeid; 311 312 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 313 xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 314 315 if (xnfp->xnf_tx_ring_ref != GRANT_INVALID_REF) 316 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 317 318 err = gnttab_grant_foreign_access(oeid, 319 xnf_btop(pa_to_ma(xnfp->xnf_tx_ring_phys_addr)), 0); 320 if (err <= 0) { 321 err = -err; 322 xenbus_dev_error(xsd, err, "granting access to tx ring page"); 323 goto out; 324 } 325 xnfp->xnf_tx_ring_ref = (grant_ref_t)err; 326 327 if (xnfp->xnf_rx_ring_ref != GRANT_INVALID_REF) 328 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 329 330 err = gnttab_grant_foreign_access(oeid, 331 xnf_btop(pa_to_ma(xnfp->xnf_rx_ring_phys_addr)), 0); 332 if (err <= 0) { 333 err = -err; 334 xenbus_dev_error(xsd, err, "granting access to rx ring page"); 335 goto out; 336 } 337 xnfp->xnf_rx_ring_ref = (grant_ref_t)err; 338 339 340 mutex_enter(&xnfp->xnf_intrlock); 341 342 /* 343 * Cleanup the TX ring. We just clean up any valid tx_pktinfo structs 344 * and reset the ring. Note that this can lose packets after a resume, 345 * but we expect to stagger on. 346 */ 347 mutex_enter(&xnfp->xnf_txlock); 348 349 for (i = 0; i < xnfp->xnf_n_tx; i++) { 350 struct tx_pktinfo *txp = &xnfp->xnf_tx_pkt_info[i]; 351 352 txp->id = i + 1; 353 354 if (txp->grant_ref == GRANT_INVALID_REF) { 355 ASSERT(txp->mp == NULL); 356 ASSERT(txp->bdesc == NULL); 357 continue; 358 } 359 360 if (gnttab_query_foreign_access(txp->grant_ref) != 0) 361 panic("tx grant still in use by backend domain"); 362 363 freemsg(txp->mp); 364 txp->mp = NULL; 365 366 (void) ddi_dma_unbind_handle(txp->dma_handle); 367 368 if (txp->bdesc != NULL) { 369 xnf_free_tx_buffer(txp->bdesc); 370 txp->bdesc = NULL; 371 } 372 373 (void) gnttab_end_foreign_access_ref(txp->grant_ref, 374 xnfp->xnf_tx_pages_readonly); 375 gnttab_release_grant_reference(&xnfp->xnf_gref_tx_head, 376 txp->grant_ref); 377 txp->grant_ref = GRANT_INVALID_REF; 378 } 379 380 xnfp->xnf_tx_pkt_id_list = 0; 381 xnfp->xnf_tx_ring.rsp_cons = 0; 382 xnfp->xnf_tx_ring.req_prod_pvt = 0; 383 384 /* LINTED: constant in conditional context */ 385 SHARED_RING_INIT(xnfp->xnf_tx_ring.sring); 386 387 mutex_exit(&xnfp->xnf_txlock); 388 389 /* 390 * Rebuild the RX ring. We have to rebuild the RX ring because some of 391 * our pages are currently flipped out/granted so we can't just free 392 * the RX buffers. Reclaim any unprocessed recv buffers, they won't be 393 * useable anyway since the mfn's they refer to are no longer valid. 394 * Grant the backend domain access to each hung rx buffer. 395 */ 396 i = xnfp->xnf_rx_ring.rsp_cons; 397 while (i++ != xnfp->xnf_rx_ring.sring->req_prod) { 398 volatile netif_rx_request_t *rxrp; 399 400 rxrp = RING_GET_REQUEST(&xnfp->xnf_rx_ring, i); 401 ix = rxrp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0); 402 rbp = xnfp->xnf_rxpkt_bufptr[ix]; 403 if (rbp != NULL) { 404 grant_ref_t ref = rbp->grant_ref; 405 406 ASSERT(ref != GRANT_INVALID_REF); 407 if (xnfp->xnf_rx_hvcopy) { 408 pfn_t pfn = xnf_btop(rbp->buf_phys); 409 mfn_t mfn = pfn_to_mfn(pfn); 410 411 gnttab_grant_foreign_access_ref(ref, oeid, 412 mfn, 0); 413 } else { 414 gnttab_grant_foreign_transfer_ref(ref, 415 oeid, 0); 416 } 417 rxrp->id = ix; 418 rxrp->gref = ref; 419 } 420 } 421 422 /* 423 * Reset the ring pointers to initial state. 424 * Hang buffers for any empty ring slots. 425 */ 426 xnfp->xnf_rx_ring.rsp_cons = 0; 427 xnfp->xnf_rx_ring.req_prod_pvt = 0; 428 429 /* LINTED: constant in conditional context */ 430 SHARED_RING_INIT(xnfp->xnf_rx_ring.sring); 431 432 for (i = 0; i < NET_RX_RING_SIZE; i++) { 433 xnfp->xnf_rx_ring.req_prod_pvt = i; 434 if (xnfp->xnf_rxpkt_bufptr[i] != NULL) 435 continue; 436 if ((bdesc = xnf_get_buffer(xnfp)) == NULL) 437 break; 438 rx_buffer_hang(xnfp, bdesc); 439 } 440 xnfp->xnf_rx_ring.req_prod_pvt = i; 441 /* LINTED: constant in conditional context */ 442 RING_PUSH_REQUESTS(&xnfp->xnf_rx_ring); 443 444 mutex_exit(&xnfp->xnf_intrlock); 445 446 return (0); 447 448 out: 449 if (xnfp->xnf_tx_ring_ref != GRANT_INVALID_REF) 450 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 451 xnfp->xnf_tx_ring_ref = GRANT_INVALID_REF; 452 453 if (xnfp->xnf_rx_ring_ref != GRANT_INVALID_REF) 454 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 455 xnfp->xnf_rx_ring_ref = GRANT_INVALID_REF; 456 457 return (err); 458 } 459 460 461 /* Called when the upper layers free a message we passed upstream */ 462 static void 463 xnf_copy_rcv_complete(struct xnf_buffer_desc *bdesc) 464 { 465 (void) ddi_dma_unbind_handle(bdesc->dma_handle); 466 ddi_dma_mem_free(&bdesc->acc_handle); 467 ddi_dma_free_handle(&bdesc->dma_handle); 468 kmem_free(bdesc, sizeof (*bdesc)); 469 } 470 471 472 /* 473 * Connect driver to back end, called to set up communication with 474 * back end driver both initially and on resume after restore/migrate. 475 */ 476 void 477 xnf_be_connect(xnf_t *xnfp) 478 { 479 const char *message; 480 xenbus_transaction_t xbt; 481 struct xenbus_device *xsd; 482 char *xsname; 483 int err; 484 485 ASSERT(!xnfp->xnf_connected); 486 487 xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 488 xsname = xvdi_get_xsname(xnfp->xnf_devinfo); 489 490 err = xnf_setup_rings(xnfp); 491 if (err != 0) { 492 cmn_err(CE_WARN, "failed to set up tx/rx rings"); 493 xenbus_dev_error(xsd, err, "setting up ring"); 494 return; 495 } 496 497 again: 498 err = xenbus_transaction_start(&xbt); 499 if (err != 0) { 500 xenbus_dev_error(xsd, EIO, "starting transaction"); 501 return; 502 } 503 504 err = xenbus_printf(xbt, xsname, "tx-ring-ref", "%u", 505 xnfp->xnf_tx_ring_ref); 506 if (err != 0) { 507 message = "writing tx ring-ref"; 508 goto abort_transaction; 509 } 510 511 err = xenbus_printf(xbt, xsname, "rx-ring-ref", "%u", 512 xnfp->xnf_rx_ring_ref); 513 if (err != 0) { 514 message = "writing rx ring-ref"; 515 goto abort_transaction; 516 } 517 518 err = xenbus_printf(xbt, xsname, "event-channel", "%u", 519 xnfp->xnf_evtchn); 520 if (err != 0) { 521 message = "writing event-channel"; 522 goto abort_transaction; 523 } 524 525 err = xenbus_printf(xbt, xsname, "feature-rx-notify", "%d", 1); 526 if (err != 0) { 527 message = "writing feature-rx-notify"; 528 goto abort_transaction; 529 } 530 531 if (!xnfp->xnf_tx_pages_readonly) { 532 err = xenbus_printf(xbt, xsname, "feature-tx-writable", 533 "%d", 1); 534 if (err != 0) { 535 message = "writing feature-tx-writable"; 536 goto abort_transaction; 537 } 538 } 539 540 err = xenbus_printf(xbt, xsname, "feature-no-csum-offload", "%d", 541 xnfp->xnf_cksum_offload ? 0 : 1); 542 if (err != 0) { 543 message = "writing feature-no-csum-offload"; 544 goto abort_transaction; 545 } 546 err = xenbus_printf(xbt, xsname, "request-rx-copy", "%d", 547 xnfp->xnf_rx_hvcopy ? 1 : 0); 548 if (err != 0) { 549 message = "writing request-rx-copy"; 550 goto abort_transaction; 551 } 552 553 err = xenbus_printf(xbt, xsname, "state", "%d", XenbusStateConnected); 554 if (err != 0) { 555 message = "writing frontend XenbusStateConnected"; 556 goto abort_transaction; 557 } 558 559 err = xenbus_transaction_end(xbt, 0); 560 if (err != 0) { 561 if (err == EAGAIN) 562 goto again; 563 xenbus_dev_error(xsd, err, "completing transaction"); 564 } 565 566 return; 567 568 abort_transaction: 569 (void) xenbus_transaction_end(xbt, 1); 570 xenbus_dev_error(xsd, err, "%s", message); 571 } 572 573 /* 574 * Read config info from xenstore 575 */ 576 void 577 xnf_read_config(xnf_t *xnfp) 578 { 579 char mac[ETHERADDRL * 3]; 580 int err, be_no_cksum_offload; 581 582 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->xnf_devinfo), "mac", 583 "%s", (char *)&mac[0]); 584 if (err != 0) { 585 /* 586 * bad: we're supposed to be set up with a proper mac 587 * addr. at this point 588 */ 589 cmn_err(CE_WARN, "%s%d: no mac address", 590 ddi_driver_name(xnfp->xnf_devinfo), 591 ddi_get_instance(xnfp->xnf_devinfo)); 592 return; 593 } 594 if (ether_aton(mac, xnfp->xnf_mac_addr) != ETHERADDRL) { 595 err = ENOENT; 596 xenbus_dev_error(xvdi_get_xsd(xnfp->xnf_devinfo), ENOENT, 597 "parsing %s/mac", xvdi_get_xsname(xnfp->xnf_devinfo)); 598 return; 599 } 600 601 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->xnf_devinfo), 602 "feature-no-csum-offload", "%d", &be_no_cksum_offload); 603 /* 604 * If we fail to read the store we assume that the key is 605 * absent, implying an older domain at the far end. Older 606 * domains always support checksum offload. 607 */ 608 if (err != 0) 609 be_no_cksum_offload = 0; 610 /* 611 * If the far end cannot do checksum offload or we do not wish 612 * to do it, disable it. 613 */ 614 if ((be_no_cksum_offload == 1) || !xnfp->xnf_cksum_offload) 615 xnfp->xnf_cksum_offload = B_FALSE; 616 } 617 618 /* 619 * attach(9E) -- Attach a device to the system 620 * 621 * Called once for each board successfully probed. 622 */ 623 static int 624 xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) 625 { 626 mac_register_t *macp; 627 xnf_t *xnfp; 628 int err; 629 630 #ifdef XNF_DEBUG 631 if (xnfdebug & XNF_DEBUG_DDI) 632 printf("xnf%d: attach(0x%p)\n", ddi_get_instance(devinfo), 633 (void *)devinfo); 634 #endif 635 636 switch (cmd) { 637 case DDI_RESUME: 638 xnfp = ddi_get_driver_private(devinfo); 639 640 (void) xvdi_resume(devinfo); 641 (void) xvdi_alloc_evtchn(devinfo); 642 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 643 #ifdef XPV_HVM_DRIVER 644 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, 645 xnfp); 646 #else 647 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, 648 (caddr_t)xnfp); 649 #endif 650 xnf_be_connect(xnfp); 651 /* 652 * Our MAC address may have changed if we're resuming: 653 * - on a different host 654 * - on the same one and got a different MAC address 655 * because we didn't specify one of our own. 656 * so it's useful to claim that it changed in order that 657 * IP send out a gratuitous ARP. 658 */ 659 mac_unicst_update(xnfp->xnf_mh, xnfp->xnf_mac_addr); 660 return (DDI_SUCCESS); 661 662 case DDI_ATTACH: 663 break; 664 665 default: 666 return (DDI_FAILURE); 667 } 668 669 /* 670 * Allocate gld_mac_info_t and xnf_instance structures 671 */ 672 macp = mac_alloc(MAC_VERSION); 673 if (macp == NULL) 674 return (DDI_FAILURE); 675 xnfp = kmem_zalloc(sizeof (*xnfp), KM_SLEEP); 676 677 macp->m_dip = devinfo; 678 macp->m_driver = xnfp; 679 xnfp->xnf_devinfo = devinfo; 680 681 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 682 macp->m_src_addr = xnfp->xnf_mac_addr; 683 macp->m_callbacks = &xnf_callbacks; 684 macp->m_min_sdu = 0; 685 macp->m_max_sdu = XNF_MAXPKT; 686 687 xnfp->xnf_running = B_FALSE; 688 xnfp->xnf_connected = B_FALSE; 689 xnfp->xnf_cksum_offload = xnf_cksum_offload; 690 xnfp->xnf_tx_pages_readonly = xnf_tx_pages_readonly; 691 692 xnfp->xnf_rx_hvcopy = xnf_hvcopy_peer_status(devinfo) && xnf_rx_hvcopy; 693 #ifdef XPV_HVM_DRIVER 694 /* 695 * Report our version to dom0. 696 */ 697 if (xenbus_printf(XBT_NULL, "hvmpv/xnf", "version", "%d", 698 HVMPV_XNF_VERS)) 699 cmn_err(CE_WARN, "xnf: couldn't write version\n"); 700 701 if (!xnfp->xnf_rx_hvcopy) { 702 cmn_err(CE_WARN, "The xnf driver requires a dom0 that " 703 "supports 'feature-rx-copy'"); 704 goto failure; 705 } 706 #endif 707 708 /* 709 * Get the iblock cookie with which to initialize the mutexes. 710 */ 711 if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->xnf_icookie) 712 != DDI_SUCCESS) 713 goto failure; 714 /* 715 * Driver locking strategy: the txlock protects all paths 716 * through the driver, except the interrupt thread. 717 * If the interrupt thread needs to do something which could 718 * affect the operation of any other part of the driver, 719 * it needs to acquire the txlock mutex. 720 */ 721 mutex_init(&xnfp->xnf_tx_buf_mutex, 722 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 723 mutex_init(&xnfp->xnf_rx_buf_mutex, 724 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 725 mutex_init(&xnfp->xnf_txlock, 726 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 727 mutex_init(&xnfp->xnf_intrlock, 728 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 729 cv_init(&xnfp->xnf_cv, NULL, CV_DEFAULT, NULL); 730 731 xnfp->xnf_gref_tx_head = (grant_ref_t)-1; 732 xnfp->xnf_gref_rx_head = (grant_ref_t)-1; 733 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, 734 &xnfp->xnf_gref_tx_head) < 0) { 735 cmn_err(CE_WARN, "xnf%d: can't alloc tx grant refs", 736 ddi_get_instance(xnfp->xnf_devinfo)); 737 goto failure_1; 738 } 739 if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, 740 &xnfp->xnf_gref_rx_head) < 0) { 741 cmn_err(CE_WARN, "xnf%d: can't alloc rx grant refs", 742 ddi_get_instance(xnfp->xnf_devinfo)); 743 goto failure_1; 744 } 745 if (xnf_alloc_dma_resources(xnfp) == DDI_FAILURE) { 746 cmn_err(CE_WARN, "xnf%d: failed to allocate and initialize " 747 "driver data structures", 748 ddi_get_instance(xnfp->xnf_devinfo)); 749 goto failure_1; 750 } 751 752 xnfp->xnf_rx_ring.sring->rsp_event = 753 xnfp->xnf_tx_ring.sring->rsp_event = 1; 754 755 xnfp->xnf_tx_ring_ref = GRANT_INVALID_REF; 756 xnfp->xnf_rx_ring_ref = GRANT_INVALID_REF; 757 758 /* set driver private pointer now */ 759 ddi_set_driver_private(devinfo, xnfp); 760 761 if (xvdi_add_event_handler(devinfo, XS_OE_STATE, oe_state_change) 762 != DDI_SUCCESS) 763 goto failure_1; 764 765 if (!xnf_kstat_init(xnfp)) 766 goto failure_2; 767 768 /* 769 * Allocate an event channel, add the interrupt handler and 770 * bind it to the event channel. 771 */ 772 (void) xvdi_alloc_evtchn(devinfo); 773 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 774 #ifdef XPV_HVM_DRIVER 775 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, xnfp); 776 #else 777 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, (caddr_t)xnfp); 778 #endif 779 780 xnf_read_config(xnfp); 781 err = mac_register(macp, &xnfp->xnf_mh); 782 mac_free(macp); 783 macp = NULL; 784 if (err != 0) 785 goto failure_3; 786 787 #ifdef XPV_HVM_DRIVER 788 /* 789 * In the HVM case, this driver essentially replaces a driver for 790 * a 'real' PCI NIC. Without the "model" property set to 791 * "Ethernet controller", like the PCI code does, netbooting does 792 * not work correctly, as strplumb_get_netdev_path() will not find 793 * this interface. 794 */ 795 (void) ndi_prop_update_string(DDI_DEV_T_NONE, devinfo, "model", 796 "Ethernet controller"); 797 #endif 798 799 /* 800 * connect to the backend 801 */ 802 xnf_be_connect(xnfp); 803 804 return (DDI_SUCCESS); 805 806 failure_3: 807 kstat_delete(xnfp->xnf_kstat_aux); 808 #ifdef XPV_HVM_DRIVER 809 ec_unbind_evtchn(xnfp->xnf_evtchn); 810 xvdi_free_evtchn(devinfo); 811 #else 812 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 813 #endif 814 xnfp->xnf_evtchn = INVALID_EVTCHN; 815 816 failure_2: 817 xvdi_remove_event_handler(devinfo, XS_OE_STATE); 818 819 failure_1: 820 if (xnfp->xnf_gref_tx_head != (grant_ref_t)-1) 821 gnttab_free_grant_references(xnfp->xnf_gref_tx_head); 822 if (xnfp->xnf_gref_rx_head != (grant_ref_t)-1) 823 gnttab_free_grant_references(xnfp->xnf_gref_rx_head); 824 xnf_release_dma_resources(xnfp); 825 cv_destroy(&xnfp->xnf_cv); 826 mutex_destroy(&xnfp->xnf_rx_buf_mutex); 827 mutex_destroy(&xnfp->xnf_txlock); 828 mutex_destroy(&xnfp->xnf_intrlock); 829 830 failure: 831 kmem_free(xnfp, sizeof (*xnfp)); 832 if (macp != NULL) 833 mac_free(macp); 834 835 return (DDI_FAILURE); 836 } 837 838 /* detach(9E) -- Detach a device from the system */ 839 static int 840 xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) 841 { 842 xnf_t *xnfp; /* Our private device info */ 843 int i; 844 845 #ifdef XNF_DEBUG 846 if (xnfdebug & XNF_DEBUG_DDI) 847 printf("xnf_detach(0x%p)\n", (void *)devinfo); 848 #endif 849 850 xnfp = ddi_get_driver_private(devinfo); 851 852 switch (cmd) { 853 case DDI_SUSPEND: 854 #ifdef XPV_HVM_DRIVER 855 ec_unbind_evtchn(xnfp->xnf_evtchn); 856 xvdi_free_evtchn(devinfo); 857 #else 858 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 859 #endif 860 861 xvdi_suspend(devinfo); 862 863 mutex_enter(&xnfp->xnf_intrlock); 864 mutex_enter(&xnfp->xnf_txlock); 865 866 xnfp->xnf_evtchn = INVALID_EVTCHN; 867 xnfp->xnf_connected = B_FALSE; 868 mutex_exit(&xnfp->xnf_txlock); 869 mutex_exit(&xnfp->xnf_intrlock); 870 return (DDI_SUCCESS); 871 872 case DDI_DETACH: 873 break; 874 875 default: 876 return (DDI_FAILURE); 877 } 878 879 if (xnfp->xnf_connected) 880 return (DDI_FAILURE); 881 882 /* Wait for receive buffers to be returned; give up after 5 seconds */ 883 i = 50; 884 885 mutex_enter(&xnfp->xnf_rx_buf_mutex); 886 while (xnfp->xnf_rx_bufs_outstanding > 0) { 887 mutex_exit(&xnfp->xnf_rx_buf_mutex); 888 delay(drv_usectohz(100000)); 889 if (--i == 0) { 890 cmn_err(CE_WARN, 891 "xnf%d: never reclaimed all the " 892 "receive buffers. Still have %d " 893 "buffers outstanding.", 894 ddi_get_instance(xnfp->xnf_devinfo), 895 xnfp->xnf_rx_bufs_outstanding); 896 return (DDI_FAILURE); 897 } 898 mutex_enter(&xnfp->xnf_rx_buf_mutex); 899 } 900 mutex_exit(&xnfp->xnf_rx_buf_mutex); 901 902 if (mac_unregister(xnfp->xnf_mh) != 0) 903 return (DDI_FAILURE); 904 905 kstat_delete(xnfp->xnf_kstat_aux); 906 907 /* Stop the receiver */ 908 xnf_stop(xnfp); 909 910 xvdi_remove_event_handler(devinfo, XS_OE_STATE); 911 912 /* Remove the interrupt */ 913 #ifdef XPV_HVM_DRIVER 914 ec_unbind_evtchn(xnfp->xnf_evtchn); 915 xvdi_free_evtchn(devinfo); 916 #else 917 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 918 #endif 919 920 /* Release any pending xmit mblks */ 921 xnf_release_mblks(xnfp); 922 923 /* Release all DMA resources */ 924 xnf_release_dma_resources(xnfp); 925 926 cv_destroy(&xnfp->xnf_cv); 927 mutex_destroy(&xnfp->xnf_rx_buf_mutex); 928 mutex_destroy(&xnfp->xnf_txlock); 929 mutex_destroy(&xnfp->xnf_intrlock); 930 931 kmem_free(xnfp, sizeof (*xnfp)); 932 933 return (DDI_SUCCESS); 934 } 935 936 /* 937 * xnf_set_mac_addr() -- set the physical network address on the board. 938 */ 939 /*ARGSUSED*/ 940 static int 941 xnf_set_mac_addr(void *arg, const uint8_t *macaddr) 942 { 943 xnf_t *xnfp = arg; 944 945 #ifdef XNF_DEBUG 946 if (xnfdebug & XNF_DEBUG_TRACE) 947 printf("xnf%d: set_mac_addr(0x%p): " 948 "%02x:%02x:%02x:%02x:%02x:%02x\n", 949 ddi_get_instance(xnfp->xnf_devinfo), 950 (void *)xnfp, macaddr[0], macaddr[1], macaddr[2], 951 macaddr[3], macaddr[4], macaddr[5]); 952 #endif 953 /* 954 * We can't set our macaddr. 955 * 956 * XXPV dme: Why not? 957 */ 958 return (ENOTSUP); 959 } 960 961 /* 962 * xnf_set_multicast() -- set (enable) or disable a multicast address. 963 * 964 * Program the hardware to enable/disable the multicast address 965 * in "mcast". Enable if "add" is true, disable if false. 966 */ 967 /*ARGSUSED*/ 968 static int 969 xnf_set_multicast(void *arg, boolean_t add, const uint8_t *mca) 970 { 971 xnf_t *xnfp = arg; 972 973 #ifdef XNF_DEBUG 974 if (xnfdebug & XNF_DEBUG_TRACE) 975 printf("xnf%d set_multicast(0x%p): " 976 "%02x:%02x:%02x:%02x:%02x:%02x\n", 977 ddi_get_instance(xnfp->xnf_devinfo), 978 (void *)xnfp, mca[0], mca[1], mca[2], 979 mca[3], mca[4], mca[5]); 980 #endif 981 982 /* 983 * XXPV dme: Ideally we'd relay the address to the backend for 984 * enabling. The protocol doesn't support that (interesting 985 * extension), so we simply succeed and hope that the relevant 986 * packets are going to arrive. 987 * 988 * If protocol support is added for enable/disable then we'll 989 * need to keep a list of those in use and re-add on resume. 990 */ 991 return (0); 992 } 993 994 /* 995 * xnf_set_promiscuous() -- set or reset promiscuous mode on the board 996 * 997 * Program the hardware to enable/disable promiscuous mode. 998 */ 999 /*ARGSUSED*/ 1000 static int 1001 xnf_set_promiscuous(void *arg, boolean_t on) 1002 { 1003 xnf_t *xnfp = arg; 1004 1005 #ifdef XNF_DEBUG 1006 if (xnfdebug & XNF_DEBUG_TRACE) 1007 printf("xnf%d set_promiscuous(0x%p, %x)\n", 1008 ddi_get_instance(xnfp->xnf_devinfo), 1009 (void *)xnfp, on); 1010 #endif 1011 /* 1012 * We can't really do this, but we pretend that we can in 1013 * order that snoop will work. 1014 */ 1015 return (0); 1016 } 1017 1018 /* 1019 * Clean buffers that we have responses for from the transmit ring. 1020 */ 1021 static int 1022 xnf_clean_tx_ring(xnf_t *xnfp) 1023 { 1024 RING_IDX next_resp, i; 1025 struct tx_pktinfo *reap; 1026 int id; 1027 grant_ref_t ref; 1028 boolean_t work_to_do; 1029 1030 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 1031 1032 loop: 1033 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_tx_ring)) { 1034 /* 1035 * index of next transmission ack 1036 */ 1037 next_resp = xnfp->xnf_tx_ring.sring->rsp_prod; 1038 membar_consumer(); 1039 /* 1040 * Clean tx packets from ring that we have responses for 1041 */ 1042 for (i = xnfp->xnf_tx_ring.rsp_cons; i != next_resp; i++) { 1043 id = RING_GET_RESPONSE(&xnfp->xnf_tx_ring, i)->id; 1044 reap = &xnfp->xnf_tx_pkt_info[id]; 1045 ref = reap->grant_ref; 1046 /* 1047 * Return id to free list 1048 */ 1049 reap->id = xnfp->xnf_tx_pkt_id_list; 1050 xnfp->xnf_tx_pkt_id_list = id; 1051 if (gnttab_query_foreign_access(ref) != 0) 1052 panic("tx grant still in use " 1053 "by backend domain"); 1054 (void) ddi_dma_unbind_handle(reap->dma_handle); 1055 (void) gnttab_end_foreign_access_ref(ref, 1056 xnfp->xnf_tx_pages_readonly); 1057 gnttab_release_grant_reference(&xnfp->xnf_gref_tx_head, 1058 ref); 1059 freemsg(reap->mp); 1060 reap->mp = NULL; 1061 reap->grant_ref = GRANT_INVALID_REF; 1062 if (reap->bdesc != NULL) 1063 xnf_free_tx_buffer(reap->bdesc); 1064 reap->bdesc = NULL; 1065 } 1066 xnfp->xnf_tx_ring.rsp_cons = next_resp; 1067 membar_enter(); 1068 } 1069 1070 /* LINTED: constant in conditional context */ 1071 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_tx_ring, work_to_do); 1072 if (work_to_do) 1073 goto loop; 1074 1075 return (RING_FREE_REQUESTS(&xnfp->xnf_tx_ring)); 1076 } 1077 1078 /* 1079 * If we need to pull up data from either a packet that crosses a page 1080 * boundary or consisting of multiple mblks, do it here. We allocate 1081 * a page aligned buffer and copy the data into it. The header for the 1082 * allocated buffer is returned. (which is also allocated here) 1083 */ 1084 static struct xnf_buffer_desc * 1085 xnf_pullupmsg(xnf_t *xnfp, mblk_t *mp) 1086 { 1087 struct xnf_buffer_desc *bdesc; 1088 mblk_t *mptr; 1089 caddr_t bp; 1090 int len; 1091 1092 /* 1093 * get a xmit buffer from the xmit buffer pool 1094 */ 1095 mutex_enter(&xnfp->xnf_rx_buf_mutex); 1096 bdesc = xnf_get_tx_buffer(xnfp); 1097 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1098 if (bdesc == NULL) 1099 return (bdesc); 1100 /* 1101 * Copy the data into the buffer 1102 */ 1103 xnfp->xnf_stat_tx_pullup++; 1104 bp = bdesc->buf; 1105 for (mptr = mp; mptr != NULL; mptr = mptr->b_cont) { 1106 len = mptr->b_wptr - mptr->b_rptr; 1107 bcopy(mptr->b_rptr, bp, len); 1108 bp += len; 1109 } 1110 return (bdesc); 1111 } 1112 1113 void 1114 xnf_pseudo_cksum(caddr_t buf, int length) 1115 { 1116 struct ether_header *ehp; 1117 uint16_t sap, len, *stuff; 1118 uint32_t cksum; 1119 size_t offset; 1120 ipha_t *ipha; 1121 ipaddr_t src, dst; 1122 1123 ASSERT(length >= sizeof (*ehp)); 1124 ehp = (struct ether_header *)buf; 1125 1126 if (ntohs(ehp->ether_type) == VLAN_TPID) { 1127 struct ether_vlan_header *evhp; 1128 1129 ASSERT(length >= sizeof (*evhp)); 1130 evhp = (struct ether_vlan_header *)buf; 1131 sap = ntohs(evhp->ether_type); 1132 offset = sizeof (*evhp); 1133 } else { 1134 sap = ntohs(ehp->ether_type); 1135 offset = sizeof (*ehp); 1136 } 1137 1138 ASSERT(sap == ETHERTYPE_IP); 1139 1140 /* Packet should have been pulled up by the caller. */ 1141 if ((offset + sizeof (ipha_t)) > length) { 1142 cmn_err(CE_WARN, "xnf_pseudo_cksum: no room for checksum"); 1143 return; 1144 } 1145 1146 ipha = (ipha_t *)(buf + offset); 1147 1148 ASSERT(IPH_HDR_LENGTH(ipha) == IP_SIMPLE_HDR_LENGTH); 1149 1150 len = ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH; 1151 1152 switch (ipha->ipha_protocol) { 1153 case IPPROTO_TCP: 1154 stuff = IPH_TCPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 1155 cksum = IP_TCP_CSUM_COMP; 1156 break; 1157 case IPPROTO_UDP: 1158 stuff = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 1159 cksum = IP_UDP_CSUM_COMP; 1160 break; 1161 default: 1162 cmn_err(CE_WARN, "xnf_pseudo_cksum: unexpected protocol %d", 1163 ipha->ipha_protocol); 1164 return; 1165 } 1166 1167 src = ipha->ipha_src; 1168 dst = ipha->ipha_dst; 1169 1170 cksum += (dst >> 16) + (dst & 0xFFFF); 1171 cksum += (src >> 16) + (src & 0xFFFF); 1172 cksum += htons(len); 1173 1174 cksum = (cksum >> 16) + (cksum & 0xFFFF); 1175 cksum = (cksum >> 16) + (cksum & 0xFFFF); 1176 1177 ASSERT(cksum <= 0xFFFF); 1178 1179 *stuff = (uint16_t)(cksum ? cksum : ~cksum); 1180 } 1181 1182 /* 1183 * xnf_send_one() -- send a packet 1184 * 1185 * Called when a packet is ready to be transmitted. A pointer to an 1186 * M_DATA message that contains the packet is passed to this routine. 1187 * At least the complete LLC header is contained in the message's 1188 * first message block, and the remainder of the packet is contained 1189 * within additional M_DATA message blocks linked to the first 1190 * message block. 1191 * 1192 */ 1193 static boolean_t 1194 xnf_send_one(xnf_t *xnfp, mblk_t *mp) 1195 { 1196 struct xnf_buffer_desc *xmitbuf; 1197 struct tx_pktinfo *txp_info; 1198 mblk_t *mptr; 1199 ddi_dma_cookie_t dma_cookie; 1200 RING_IDX slot; 1201 int length = 0, i, pktlen = 0, rc, tx_id; 1202 int tx_ring_freespace, page_oops; 1203 uint_t ncookies; 1204 volatile netif_tx_request_t *txrp; 1205 caddr_t bufaddr; 1206 grant_ref_t ref; 1207 unsigned long mfn; 1208 uint32_t pflags; 1209 domid_t oeid; 1210 1211 #ifdef XNF_DEBUG 1212 if (xnfdebug & XNF_DEBUG_SEND) 1213 printf("xnf%d send(0x%p, 0x%p)\n", 1214 ddi_get_instance(xnfp->xnf_devinfo), 1215 (void *)xnfp, (void *)mp); 1216 #endif 1217 1218 ASSERT(mp != NULL); 1219 ASSERT(mp->b_next == NULL); 1220 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 1221 1222 tx_ring_freespace = xnf_clean_tx_ring(xnfp); 1223 ASSERT(tx_ring_freespace >= 0); 1224 1225 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 1226 xnfp->xnf_stat_tx_attempt++; 1227 /* 1228 * If there are no xmit ring slots available, return. 1229 */ 1230 if (tx_ring_freespace == 0) { 1231 xnfp->xnf_stat_tx_defer++; 1232 return (B_FALSE); /* Send should be retried */ 1233 } 1234 1235 slot = xnfp->xnf_tx_ring.req_prod_pvt; 1236 /* Count the number of mblks in message and compute packet size */ 1237 for (i = 0, mptr = mp; mptr != NULL; mptr = mptr->b_cont, i++) 1238 pktlen += (mptr->b_wptr - mptr->b_rptr); 1239 1240 /* Make sure packet isn't too large */ 1241 if (pktlen > XNF_FRAMESIZE) { 1242 cmn_err(CE_WARN, "xnf%d: oversized packet (%d bytes) dropped", 1243 ddi_get_instance(xnfp->xnf_devinfo), pktlen); 1244 freemsg(mp); 1245 return (B_TRUE); 1246 } 1247 1248 /* 1249 * Test if we cross a page boundary with our buffer 1250 */ 1251 page_oops = (i == 1) && 1252 (xnf_btop((size_t)mp->b_rptr) != 1253 xnf_btop((size_t)(mp->b_rptr + pktlen))); 1254 /* 1255 * XXPV - unfortunately, the Xen virtual net device currently 1256 * doesn't support multiple packet frags, so this will always 1257 * end up doing the pullup if we got more than one packet. 1258 */ 1259 if (i > xnf_max_tx_frags || page_oops) { 1260 if (page_oops) 1261 xnfp->xnf_stat_tx_pagebndry++; 1262 if ((xmitbuf = xnf_pullupmsg(xnfp, mp)) == NULL) { 1263 /* could not allocate resources? */ 1264 #ifdef XNF_DEBUG 1265 cmn_err(CE_WARN, "xnf%d: pullupmsg failed", 1266 ddi_get_instance(xnfp->xnf_devinfo)); 1267 #endif 1268 xnfp->xnf_stat_tx_defer++; 1269 return (B_FALSE); /* Retry send */ 1270 } 1271 bufaddr = xmitbuf->buf; 1272 } else { 1273 xmitbuf = NULL; 1274 bufaddr = (caddr_t)mp->b_rptr; 1275 } 1276 1277 /* set up data descriptor */ 1278 length = pktlen; 1279 1280 /* 1281 * Get packet id from free list 1282 */ 1283 tx_id = xnfp->xnf_tx_pkt_id_list; 1284 ASSERT(tx_id < NET_TX_RING_SIZE); 1285 txp_info = &xnfp->xnf_tx_pkt_info[tx_id]; 1286 xnfp->xnf_tx_pkt_id_list = txp_info->id; 1287 txp_info->id = tx_id; 1288 1289 /* Prepare for DMA mapping of tx buffer(s) */ 1290 rc = ddi_dma_addr_bind_handle(txp_info->dma_handle, 1291 NULL, bufaddr, length, DDI_DMA_WRITE | DDI_DMA_STREAMING, 1292 DDI_DMA_DONTWAIT, 0, &dma_cookie, &ncookies); 1293 if (rc != DDI_DMA_MAPPED) { 1294 ASSERT(rc != DDI_DMA_INUSE); 1295 ASSERT(rc != DDI_DMA_PARTIAL_MAP); 1296 /* 1297 * Return id to free list 1298 */ 1299 txp_info->id = xnfp->xnf_tx_pkt_id_list; 1300 xnfp->xnf_tx_pkt_id_list = tx_id; 1301 if (rc == DDI_DMA_NORESOURCES) { 1302 xnfp->xnf_stat_tx_defer++; 1303 return (B_FALSE); /* Retry later */ 1304 } 1305 #ifdef XNF_DEBUG 1306 cmn_err(CE_WARN, "xnf%d: bind_handle failed (%x)", 1307 ddi_get_instance(xnfp->xnf_devinfo), rc); 1308 #endif 1309 return (B_FALSE); 1310 } 1311 1312 ASSERT(ncookies == 1); 1313 ref = gnttab_claim_grant_reference(&xnfp->xnf_gref_tx_head); 1314 ASSERT((signed short)ref >= 0); 1315 mfn = xnf_btop(pa_to_ma((paddr_t)dma_cookie.dmac_laddress)); 1316 gnttab_grant_foreign_access_ref(ref, oeid, mfn, 1317 xnfp->xnf_tx_pages_readonly); 1318 txp_info->grant_ref = ref; 1319 txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot); 1320 txrp->gref = ref; 1321 txrp->size = dma_cookie.dmac_size; 1322 txrp->offset = (uintptr_t)bufaddr & PAGEOFFSET; 1323 txrp->id = tx_id; 1324 txrp->flags = 0; 1325 hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags); 1326 if (pflags != 0) { 1327 ASSERT(xnfp->xnf_cksum_offload); 1328 /* 1329 * If the local protocol stack requests checksum 1330 * offload we set the 'checksum blank' flag, 1331 * indicating to the peer that we need the checksum 1332 * calculated for us. 1333 * 1334 * We _don't_ set the validated flag, because we haven't 1335 * validated that the data and the checksum match. 1336 */ 1337 xnf_pseudo_cksum(bufaddr, length); 1338 txrp->flags |= NETTXF_csum_blank; 1339 xnfp->xnf_stat_tx_cksum_deferred++; 1340 } 1341 membar_producer(); 1342 xnfp->xnf_tx_ring.req_prod_pvt = slot + 1; 1343 1344 txp_info->mp = mp; 1345 txp_info->bdesc = xmitbuf; 1346 1347 xnfp->xnf_stat_opackets++; 1348 xnfp->xnf_stat_obytes += pktlen; 1349 1350 return (B_TRUE); /* successful transmit attempt */ 1351 } 1352 1353 mblk_t * 1354 xnf_send(void *arg, mblk_t *mp) 1355 { 1356 xnf_t *xnfp = arg; 1357 mblk_t *next; 1358 boolean_t sent_something = B_FALSE; 1359 1360 mutex_enter(&xnfp->xnf_txlock); 1361 1362 /* 1363 * Transmission attempts should be impossible without having 1364 * previously called xnf_start(). 1365 */ 1366 ASSERT(xnfp->xnf_running); 1367 1368 /* 1369 * Wait for getting connected to the backend 1370 */ 1371 while (!xnfp->xnf_connected) { 1372 cv_wait(&xnfp->xnf_cv, &xnfp->xnf_txlock); 1373 } 1374 1375 while (mp != NULL) { 1376 next = mp->b_next; 1377 mp->b_next = NULL; 1378 1379 if (!xnf_send_one(xnfp, mp)) { 1380 mp->b_next = next; 1381 break; 1382 } 1383 1384 mp = next; 1385 sent_something = B_TRUE; 1386 } 1387 1388 if (sent_something) { 1389 boolean_t notify; 1390 1391 /* LINTED: constant in conditional context */ 1392 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring, 1393 notify); 1394 if (notify) 1395 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1396 } 1397 1398 mutex_exit(&xnfp->xnf_txlock); 1399 1400 return (mp); 1401 } 1402 1403 /* 1404 * xnf_intr() -- ring interrupt service routine 1405 */ 1406 static uint_t 1407 xnf_intr(caddr_t arg) 1408 { 1409 xnf_t *xnfp = (xnf_t *)arg; 1410 int tx_ring_space; 1411 1412 mutex_enter(&xnfp->xnf_intrlock); 1413 1414 /* spurious intr */ 1415 if (!xnfp->xnf_connected) { 1416 mutex_exit(&xnfp->xnf_intrlock); 1417 xnfp->xnf_stat_unclaimed_interrupts++; 1418 return (DDI_INTR_UNCLAIMED); 1419 } 1420 1421 #ifdef XNF_DEBUG 1422 if (xnfdebug & XNF_DEBUG_INT) 1423 printf("xnf%d intr(0x%p)\n", 1424 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1425 #endif 1426 if (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1427 mblk_t *mp; 1428 1429 if (xnfp->xnf_rx_hvcopy) 1430 mp = xnf_process_hvcopy_recv(xnfp); 1431 else 1432 mp = xnf_process_recv(xnfp); 1433 1434 if (mp != NULL) 1435 mac_rx(xnfp->xnf_mh, xnfp->xnf_rx_handle, mp); 1436 } 1437 1438 /* 1439 * Clean tx ring and try to start any blocked xmit streams if 1440 * there is now some space. 1441 */ 1442 mutex_enter(&xnfp->xnf_txlock); 1443 tx_ring_space = xnf_clean_tx_ring(xnfp); 1444 mutex_exit(&xnfp->xnf_txlock); 1445 if (tx_ring_space > XNF_TX_FREE_THRESH) { 1446 mutex_exit(&xnfp->xnf_intrlock); 1447 mac_tx_update(xnfp->xnf_mh); 1448 mutex_enter(&xnfp->xnf_intrlock); 1449 } 1450 1451 xnfp->xnf_stat_interrupts++; 1452 mutex_exit(&xnfp->xnf_intrlock); 1453 return (DDI_INTR_CLAIMED); /* indicate that the interrupt was for us */ 1454 } 1455 1456 /* 1457 * xnf_start() -- start the board receiving and enable interrupts. 1458 */ 1459 static int 1460 xnf_start(void *arg) 1461 { 1462 xnf_t *xnfp = arg; 1463 1464 #ifdef XNF_DEBUG 1465 if (xnfdebug & XNF_DEBUG_TRACE) 1466 printf("xnf%d start(0x%p)\n", 1467 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1468 #endif 1469 1470 mutex_enter(&xnfp->xnf_intrlock); 1471 mutex_enter(&xnfp->xnf_txlock); 1472 1473 /* Accept packets from above. */ 1474 xnfp->xnf_running = B_TRUE; 1475 1476 mutex_exit(&xnfp->xnf_txlock); 1477 mutex_exit(&xnfp->xnf_intrlock); 1478 1479 return (0); 1480 } 1481 1482 /* xnf_stop() - disable hardware */ 1483 static void 1484 xnf_stop(void *arg) 1485 { 1486 xnf_t *xnfp = arg; 1487 1488 #ifdef XNF_DEBUG 1489 if (xnfdebug & XNF_DEBUG_TRACE) 1490 printf("xnf%d stop(0x%p)\n", 1491 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1492 #endif 1493 1494 mutex_enter(&xnfp->xnf_intrlock); 1495 mutex_enter(&xnfp->xnf_txlock); 1496 1497 xnfp->xnf_running = B_FALSE; 1498 1499 mutex_exit(&xnfp->xnf_txlock); 1500 mutex_exit(&xnfp->xnf_intrlock); 1501 } 1502 1503 /* 1504 * Driver private functions follow 1505 */ 1506 1507 /* 1508 * Hang buffer on rx ring 1509 */ 1510 static void 1511 rx_buffer_hang(xnf_t *xnfp, struct xnf_buffer_desc *bdesc) 1512 { 1513 volatile netif_rx_request_t *reqp; 1514 RING_IDX hang_ix; 1515 grant_ref_t ref; 1516 domid_t oeid; 1517 1518 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 1519 1520 ASSERT(MUTEX_HELD(&xnfp->xnf_intrlock)); 1521 reqp = RING_GET_REQUEST(&xnfp->xnf_rx_ring, 1522 xnfp->xnf_rx_ring.req_prod_pvt); 1523 hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0)); 1524 ASSERT(xnfp->xnf_rxpkt_bufptr[hang_ix] == NULL); 1525 if (bdesc->grant_ref == GRANT_INVALID_REF) { 1526 ref = gnttab_claim_grant_reference(&xnfp->xnf_gref_rx_head); 1527 ASSERT((signed short)ref >= 0); 1528 bdesc->grant_ref = ref; 1529 if (xnfp->xnf_rx_hvcopy) { 1530 pfn_t pfn = xnf_btop(bdesc->buf_phys); 1531 mfn_t mfn = pfn_to_mfn(pfn); 1532 1533 gnttab_grant_foreign_access_ref(ref, oeid, mfn, 0); 1534 } else { 1535 gnttab_grant_foreign_transfer_ref(ref, oeid, 0); 1536 } 1537 } 1538 reqp->id = hang_ix; 1539 reqp->gref = bdesc->grant_ref; 1540 bdesc->id = hang_ix; 1541 xnfp->xnf_rxpkt_bufptr[hang_ix] = bdesc; 1542 membar_producer(); 1543 xnfp->xnf_rx_ring.req_prod_pvt++; 1544 } 1545 1546 static mblk_t * 1547 xnf_process_hvcopy_recv(xnf_t *xnfp) 1548 { 1549 netif_rx_response_t *rxpkt; 1550 mblk_t *mp, *head, *tail; 1551 struct xnf_buffer_desc *bdesc; 1552 boolean_t hwcsum = B_FALSE, notify, work_to_do; 1553 size_t len; 1554 1555 /* 1556 * in loop over unconsumed responses, we do: 1557 * 1. get a response 1558 * 2. take corresponding buffer off recv. ring 1559 * 3. indicate this by setting slot to NULL 1560 * 4. create a new message and 1561 * 5. copy data in, adjust ptr 1562 * 1563 * outside loop: 1564 * 7. make sure no more data has arrived; kick HV 1565 */ 1566 1567 head = tail = NULL; 1568 1569 loop: 1570 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1571 1572 /* 1. */ 1573 rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring, 1574 xnfp->xnf_rx_ring.rsp_cons); 1575 1576 DTRACE_PROBE4(got_PKT, int, (int)rxpkt->id, int, 1577 (int)rxpkt->offset, 1578 int, (int)rxpkt->flags, int, (int)rxpkt->status); 1579 1580 /* 1581 * 2. 1582 * Take buffer off of receive ring 1583 */ 1584 hwcsum = B_FALSE; 1585 bdesc = xnfp->xnf_rxpkt_bufptr[rxpkt->id]; 1586 /* 3 */ 1587 xnfp->xnf_rxpkt_bufptr[rxpkt->id] = NULL; 1588 ASSERT(bdesc->id == rxpkt->id); 1589 mp = NULL; 1590 if (!xnfp->xnf_running) { 1591 DTRACE_PROBE4(pkt_dropped, int, rxpkt->status, 1592 char *, bdesc->buf, int, rxpkt->offset, 1593 char *, ((char *)bdesc->buf) + rxpkt->offset); 1594 xnfp->xnf_stat_drop++; 1595 /* 1596 * re-hang the buffer 1597 */ 1598 rx_buffer_hang(xnfp, bdesc); 1599 } else if (rxpkt->status <= 0) { 1600 DTRACE_PROBE4(pkt_status_negative, int, rxpkt->status, 1601 char *, bdesc->buf, int, rxpkt->offset, 1602 char *, ((char *)bdesc->buf) + rxpkt->offset); 1603 xnfp->xnf_stat_errrx++; 1604 if (rxpkt->status == 0) 1605 xnfp->xnf_stat_runt++; 1606 if (rxpkt->status == NETIF_RSP_ERROR) 1607 xnfp->xnf_stat_mac_rcv_error++; 1608 if (rxpkt->status == NETIF_RSP_DROPPED) 1609 xnfp->xnf_stat_norxbuf++; 1610 /* 1611 * re-hang the buffer 1612 */ 1613 rx_buffer_hang(xnfp, bdesc); 1614 } else { 1615 grant_ref_t ref = bdesc->grant_ref; 1616 struct xnf_buffer_desc *new_bdesc; 1617 unsigned long off = rxpkt->offset; 1618 1619 DTRACE_PROBE4(pkt_status_ok, int, rxpkt->status, 1620 char *, bdesc->buf, int, rxpkt->offset, 1621 char *, ((char *)bdesc->buf) + rxpkt->offset); 1622 len = rxpkt->status; 1623 ASSERT(off + len <= PAGEOFFSET); 1624 if (ref == GRANT_INVALID_REF) { 1625 mp = NULL; 1626 new_bdesc = bdesc; 1627 cmn_err(CE_WARN, "Bad rx grant reference %d " 1628 "from dom %d", ref, 1629 xvdi_get_oeid(xnfp->xnf_devinfo)); 1630 goto luckless; 1631 } 1632 /* 1633 * Release ref which we'll be re-claiming in 1634 * rx_buffer_hang(). 1635 */ 1636 bdesc->grant_ref = GRANT_INVALID_REF; 1637 (void) gnttab_end_foreign_access_ref(ref, 0); 1638 gnttab_release_grant_reference(&xnfp->xnf_gref_rx_head, 1639 ref); 1640 if (rxpkt->flags & NETRXF_data_validated) 1641 hwcsum = B_TRUE; 1642 1643 /* 1644 * XXPV for the initial implementation of HVcopy, 1645 * create a new msg and copy in the data 1646 */ 1647 /* 4. */ 1648 if ((mp = allocb(len, BPRI_MED)) == NULL) { 1649 /* 1650 * Couldn't get buffer to copy to, 1651 * drop this data, and re-hang 1652 * the buffer on the ring. 1653 */ 1654 xnfp->xnf_stat_norxbuf++; 1655 DTRACE_PROBE(alloc_nix); 1656 } else { 1657 /* 5. */ 1658 DTRACE_PROBE(alloc_ok); 1659 bcopy(bdesc->buf + off, mp->b_wptr, 1660 len); 1661 mp->b_wptr += len; 1662 } 1663 new_bdesc = bdesc; 1664 luckless: 1665 1666 /* Re-hang old or hang new buffer. */ 1667 rx_buffer_hang(xnfp, new_bdesc); 1668 } 1669 if (mp) { 1670 if (hwcsum) { 1671 /* 1672 * See comments in xnf_process_recv(). 1673 */ 1674 1675 (void) hcksum_assoc(mp, NULL, 1676 NULL, 0, 0, 0, 0, 1677 HCK_FULLCKSUM | 1678 HCK_FULLCKSUM_OK, 1679 0); 1680 xnfp->xnf_stat_rx_cksum_no_need++; 1681 } 1682 if (head == NULL) { 1683 head = tail = mp; 1684 } else { 1685 tail->b_next = mp; 1686 tail = mp; 1687 } 1688 1689 ASSERT(mp->b_next == NULL); 1690 1691 xnfp->xnf_stat_ipackets++; 1692 xnfp->xnf_stat_rbytes += len; 1693 } 1694 1695 xnfp->xnf_rx_ring.rsp_cons++; 1696 1697 xnfp->xnf_stat_hvcopy_packet_processed++; 1698 } 1699 1700 /* 7. */ 1701 /* 1702 * Has more data come in since we started? 1703 */ 1704 /* LINTED: constant in conditional context */ 1705 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_rx_ring, work_to_do); 1706 if (work_to_do) 1707 goto loop; 1708 1709 /* 1710 * Indicate to the backend that we have re-filled the receive 1711 * ring. 1712 */ 1713 /* LINTED: constant in conditional context */ 1714 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify); 1715 if (notify) 1716 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1717 1718 return (head); 1719 } 1720 1721 /* Process all queued received packets */ 1722 static mblk_t * 1723 xnf_process_recv(xnf_t *xnfp) 1724 { 1725 volatile netif_rx_response_t *rxpkt; 1726 mblk_t *mp, *head, *tail; 1727 struct xnf_buffer_desc *bdesc; 1728 extern mblk_t *desballoc(unsigned char *, size_t, uint_t, frtn_t *); 1729 boolean_t hwcsum = B_FALSE, notify, work_to_do; 1730 size_t len; 1731 pfn_t pfn; 1732 long cnt; 1733 1734 head = tail = NULL; 1735 loop: 1736 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1737 1738 rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring, 1739 xnfp->xnf_rx_ring.rsp_cons); 1740 1741 /* 1742 * Take buffer off of receive ring 1743 */ 1744 hwcsum = B_FALSE; 1745 bdesc = xnfp->xnf_rxpkt_bufptr[rxpkt->id]; 1746 xnfp->xnf_rxpkt_bufptr[rxpkt->id] = NULL; 1747 ASSERT(bdesc->id == rxpkt->id); 1748 mp = NULL; 1749 if (!xnfp->xnf_running) { 1750 xnfp->xnf_stat_drop++; 1751 /* 1752 * re-hang the buffer 1753 */ 1754 rx_buffer_hang(xnfp, bdesc); 1755 } else if (rxpkt->status <= 0) { 1756 xnfp->xnf_stat_errrx++; 1757 if (rxpkt->status == 0) 1758 xnfp->xnf_stat_runt++; 1759 if (rxpkt->status == NETIF_RSP_ERROR) 1760 xnfp->xnf_stat_mac_rcv_error++; 1761 if (rxpkt->status == NETIF_RSP_DROPPED) 1762 xnfp->xnf_stat_norxbuf++; 1763 /* 1764 * re-hang the buffer 1765 */ 1766 rx_buffer_hang(xnfp, bdesc); 1767 } else { 1768 grant_ref_t ref = bdesc->grant_ref; 1769 struct xnf_buffer_desc *new_bdesc; 1770 unsigned long off = rxpkt->offset; 1771 unsigned long mfn; 1772 1773 len = rxpkt->status; 1774 ASSERT(off + len <= PAGEOFFSET); 1775 if (ref == GRANT_INVALID_REF) { 1776 mp = NULL; 1777 new_bdesc = bdesc; 1778 cmn_err(CE_WARN, "Bad rx grant reference %d " 1779 "from dom %d", ref, 1780 xvdi_get_oeid(xnfp->xnf_devinfo)); 1781 goto luckless; 1782 } 1783 bdesc->grant_ref = GRANT_INVALID_REF; 1784 mfn = gnttab_end_foreign_transfer_ref(ref); 1785 ASSERT(mfn != MFN_INVALID); 1786 ASSERT(hat_getpfnum(kas.a_hat, bdesc->buf) == 1787 PFN_INVALID); 1788 1789 gnttab_release_grant_reference(&xnfp->xnf_gref_rx_head, 1790 ref); 1791 reassign_pfn(xnf_btop(bdesc->buf_phys), mfn); 1792 hat_devload(kas.a_hat, bdesc->buf, PAGESIZE, 1793 xnf_btop(bdesc->buf_phys), 1794 PROT_READ | PROT_WRITE, HAT_LOAD); 1795 balloon_drv_added(1); 1796 1797 if (rxpkt->flags & NETRXF_data_validated) 1798 hwcsum = B_TRUE; 1799 if (len <= xnf_rx_bcopy_thresh) { 1800 /* 1801 * For small buffers, just copy the data 1802 * and send the copy upstream. 1803 */ 1804 new_bdesc = NULL; 1805 } else { 1806 /* 1807 * We send a pointer to this data upstream; 1808 * we need a new buffer to replace this one. 1809 */ 1810 mutex_enter(&xnfp->xnf_rx_buf_mutex); 1811 new_bdesc = xnf_get_buffer(xnfp); 1812 if (new_bdesc != NULL) { 1813 xnfp->xnf_rx_bufs_outstanding++; 1814 } else { 1815 xnfp->xnf_stat_rx_no_ringbuf++; 1816 } 1817 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1818 } 1819 1820 if (new_bdesc == NULL) { 1821 /* 1822 * Don't have a new ring buffer; bcopy the data 1823 * from the buffer, and preserve the 1824 * original buffer 1825 */ 1826 if ((mp = allocb(len, BPRI_MED)) == NULL) { 1827 /* 1828 * Could't get buffer to copy to, 1829 * drop this data, and re-hang 1830 * the buffer on the ring. 1831 */ 1832 xnfp->xnf_stat_norxbuf++; 1833 } else { 1834 bcopy(bdesc->buf + off, mp->b_wptr, 1835 len); 1836 } 1837 /* 1838 * Give the buffer page back to xen 1839 */ 1840 pfn = xnf_btop(bdesc->buf_phys); 1841 cnt = balloon_free_pages(1, &mfn, bdesc->buf, 1842 &pfn); 1843 if (cnt != 1) { 1844 cmn_err(CE_WARN, "unable to give a " 1845 "page back to the hypervisor\n"); 1846 } 1847 new_bdesc = bdesc; 1848 } else { 1849 if ((mp = desballoc((unsigned char *)bdesc->buf, 1850 off + len, 0, (frtn_t *)bdesc)) == NULL) { 1851 /* 1852 * Couldn't get mblk to pass recv data 1853 * up with, free the old ring buffer 1854 */ 1855 xnfp->xnf_stat_norxbuf++; 1856 xnf_rcv_complete(bdesc); 1857 goto luckless; 1858 } 1859 (void) ddi_dma_sync(bdesc->dma_handle, 1860 0, 0, DDI_DMA_SYNC_FORCPU); 1861 1862 mp->b_wptr += off; 1863 mp->b_rptr += off; 1864 } 1865 luckless: 1866 if (mp) 1867 mp->b_wptr += len; 1868 /* re-hang old or hang new buffer */ 1869 rx_buffer_hang(xnfp, new_bdesc); 1870 } 1871 if (mp) { 1872 if (hwcsum) { 1873 /* 1874 * If the peer says that the data has 1875 * been validated then we declare that 1876 * the full checksum has been 1877 * verified. 1878 * 1879 * We don't look at the "checksum 1880 * blank" flag, and hence could have a 1881 * packet here that we are asserting 1882 * is good with a blank checksum. 1883 * 1884 * The hardware checksum offload 1885 * specification says that we must 1886 * provide the actual checksum as well 1887 * as an assertion that it is valid, 1888 * but the protocol stack doesn't 1889 * actually use it and some other 1890 * drivers don't bother, so we don't. 1891 * If it was necessary we could grovel 1892 * in the packet to find it. 1893 */ 1894 1895 (void) hcksum_assoc(mp, NULL, 1896 NULL, 0, 0, 0, 0, 1897 HCK_FULLCKSUM | 1898 HCK_FULLCKSUM_OK, 1899 0); 1900 xnfp->xnf_stat_rx_cksum_no_need++; 1901 } 1902 if (head == NULL) { 1903 head = tail = mp; 1904 } else { 1905 tail->b_next = mp; 1906 tail = mp; 1907 } 1908 1909 ASSERT(mp->b_next == NULL); 1910 1911 xnfp->xnf_stat_ipackets++; 1912 xnfp->xnf_stat_rbytes += len; 1913 } 1914 1915 xnfp->xnf_rx_ring.rsp_cons++; 1916 } 1917 1918 /* 1919 * Has more data come in since we started? 1920 */ 1921 /* LINTED: constant in conditional context */ 1922 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_rx_ring, work_to_do); 1923 if (work_to_do) 1924 goto loop; 1925 1926 /* 1927 * Indicate to the backend that we have re-filled the receive 1928 * ring. 1929 */ 1930 /* LINTED: constant in conditional context */ 1931 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify); 1932 if (notify) 1933 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1934 1935 return (head); 1936 } 1937 1938 /* Called when the upper layers free a message we passed upstream */ 1939 static void 1940 xnf_rcv_complete(struct xnf_buffer_desc *bdesc) 1941 { 1942 xnf_t *xnfp = bdesc->xnfp; 1943 pfn_t pfn; 1944 long cnt; 1945 1946 /* One less outstanding receive buffer */ 1947 mutex_enter(&xnfp->xnf_rx_buf_mutex); 1948 --xnfp->xnf_rx_bufs_outstanding; 1949 /* 1950 * Return buffer to the free list, unless the free list is getting 1951 * too large. XXPV - this threshold may need tuning. 1952 */ 1953 if (xnfp->xnf_rx_descs_free < xnf_rx_bufs_lowat) { 1954 /* 1955 * Unmap the page, and hand the machine page back 1956 * to xen so it can be re-used as a backend net buffer. 1957 */ 1958 pfn = xnf_btop(bdesc->buf_phys); 1959 cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn); 1960 if (cnt != 1) { 1961 cmn_err(CE_WARN, "unable to give a page back to the " 1962 "hypervisor\n"); 1963 } 1964 1965 bdesc->next = xnfp->xnf_free_list; 1966 xnfp->xnf_free_list = bdesc; 1967 xnfp->xnf_rx_descs_free++; 1968 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1969 } else { 1970 /* 1971 * We can return everything here since we have a free buffer 1972 * that we have not given the backing page for back to xen. 1973 */ 1974 --xnfp->xnf_rx_buffer_count; 1975 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1976 (void) ddi_dma_unbind_handle(bdesc->dma_handle); 1977 ddi_dma_mem_free(&bdesc->acc_handle); 1978 ddi_dma_free_handle(&bdesc->dma_handle); 1979 kmem_free(bdesc, sizeof (*bdesc)); 1980 } 1981 } 1982 1983 /* 1984 * xnf_alloc_dma_resources() -- initialize the drivers structures 1985 */ 1986 static int 1987 xnf_alloc_dma_resources(xnf_t *xnfp) 1988 { 1989 dev_info_t *devinfo = xnfp->xnf_devinfo; 1990 int i; 1991 size_t len; 1992 ddi_dma_cookie_t dma_cookie; 1993 uint_t ncookies; 1994 struct xnf_buffer_desc *bdesc; 1995 int rc; 1996 caddr_t rptr; 1997 1998 xnfp->xnf_n_rx = NET_RX_RING_SIZE; 1999 xnfp->xnf_max_rx_bufs = xnf_rx_bufs_hiwat; 2000 2001 xnfp->xnf_n_tx = NET_TX_RING_SIZE; 2002 2003 /* 2004 * The code below allocates all the DMA data structures that 2005 * need to be released when the driver is detached. 2006 * 2007 * First allocate handles for mapping (virtual address) pointers to 2008 * transmit data buffers to physical addresses 2009 */ 2010 for (i = 0; i < xnfp->xnf_n_tx; i++) { 2011 if ((rc = ddi_dma_alloc_handle(devinfo, 2012 &tx_buffer_dma_attr, DDI_DMA_SLEEP, 0, 2013 &xnfp->xnf_tx_pkt_info[i].dma_handle)) != DDI_SUCCESS) 2014 return (DDI_FAILURE); 2015 } 2016 2017 /* 2018 * Allocate page for the transmit descriptor ring. 2019 */ 2020 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 2021 DDI_DMA_SLEEP, 0, &xnfp->xnf_tx_ring_dma_handle) != DDI_SUCCESS) 2022 goto alloc_error; 2023 2024 if (ddi_dma_mem_alloc(xnfp->xnf_tx_ring_dma_handle, 2025 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 2026 DDI_DMA_SLEEP, 0, &rptr, &len, 2027 &xnfp->xnf_tx_ring_dma_acchandle) != DDI_SUCCESS) { 2028 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2029 xnfp->xnf_tx_ring_dma_handle = NULL; 2030 goto alloc_error; 2031 } 2032 2033 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_tx_ring_dma_handle, NULL, 2034 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 2035 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 2036 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 2037 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2038 xnfp->xnf_tx_ring_dma_handle = NULL; 2039 xnfp->xnf_tx_ring_dma_acchandle = NULL; 2040 if (rc == DDI_DMA_NORESOURCES) 2041 goto alloc_error; 2042 else 2043 goto error; 2044 } 2045 2046 ASSERT(ncookies == 1); 2047 bzero(rptr, PAGESIZE); 2048 /* LINTED: constant in conditional context */ 2049 SHARED_RING_INIT((netif_tx_sring_t *)rptr); 2050 /* LINTED: constant in conditional context */ 2051 FRONT_RING_INIT(&xnfp->xnf_tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE); 2052 xnfp->xnf_tx_ring_phys_addr = dma_cookie.dmac_laddress; 2053 2054 /* 2055 * Allocate page for the receive descriptor ring. 2056 */ 2057 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 2058 DDI_DMA_SLEEP, 0, &xnfp->xnf_rx_ring_dma_handle) != DDI_SUCCESS) 2059 goto alloc_error; 2060 2061 if (ddi_dma_mem_alloc(xnfp->xnf_rx_ring_dma_handle, 2062 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 2063 DDI_DMA_SLEEP, 0, &rptr, &len, 2064 &xnfp->xnf_rx_ring_dma_acchandle) != DDI_SUCCESS) { 2065 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2066 xnfp->xnf_rx_ring_dma_handle = NULL; 2067 goto alloc_error; 2068 } 2069 2070 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_rx_ring_dma_handle, NULL, 2071 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 2072 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 2073 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 2074 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2075 xnfp->xnf_rx_ring_dma_handle = NULL; 2076 xnfp->xnf_rx_ring_dma_acchandle = NULL; 2077 if (rc == DDI_DMA_NORESOURCES) 2078 goto alloc_error; 2079 else 2080 goto error; 2081 } 2082 2083 ASSERT(ncookies == 1); 2084 bzero(rptr, PAGESIZE); 2085 /* LINTED: constant in conditional context */ 2086 SHARED_RING_INIT((netif_rx_sring_t *)rptr); 2087 /* LINTED: constant in conditional context */ 2088 FRONT_RING_INIT(&xnfp->xnf_rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE); 2089 xnfp->xnf_rx_ring_phys_addr = dma_cookie.dmac_laddress; 2090 2091 /* 2092 * Preallocate receive buffers for each receive descriptor. 2093 */ 2094 2095 /* Set up the "free list" of receive buffer descriptors */ 2096 for (i = 0; i < xnfp->xnf_n_rx; i++) { 2097 if ((bdesc = xnf_alloc_buffer(xnfp)) == NULL) 2098 goto alloc_error; 2099 bdesc->next = xnfp->xnf_free_list; 2100 xnfp->xnf_free_list = bdesc; 2101 } 2102 2103 return (DDI_SUCCESS); 2104 2105 alloc_error: 2106 cmn_err(CE_WARN, "xnf%d: could not allocate enough DMA memory", 2107 ddi_get_instance(xnfp->xnf_devinfo)); 2108 error: 2109 xnf_release_dma_resources(xnfp); 2110 return (DDI_FAILURE); 2111 } 2112 2113 /* 2114 * Release all DMA resources in the opposite order from acquisition 2115 * Should not be called until all outstanding esballoc buffers 2116 * have been returned. 2117 */ 2118 static void 2119 xnf_release_dma_resources(xnf_t *xnfp) 2120 { 2121 int i; 2122 2123 /* 2124 * Free receive buffers which are currently associated with 2125 * descriptors 2126 */ 2127 for (i = 0; i < xnfp->xnf_n_rx; i++) { 2128 struct xnf_buffer_desc *bp; 2129 2130 if ((bp = xnfp->xnf_rxpkt_bufptr[i]) == NULL) 2131 continue; 2132 xnf_free_buffer(bp); 2133 xnfp->xnf_rxpkt_bufptr[i] = NULL; 2134 } 2135 2136 /* Free the receive ring buffer */ 2137 if (xnfp->xnf_rx_ring_dma_acchandle != NULL) { 2138 (void) ddi_dma_unbind_handle(xnfp->xnf_rx_ring_dma_handle); 2139 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 2140 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2141 xnfp->xnf_rx_ring_dma_acchandle = NULL; 2142 } 2143 /* Free the transmit ring buffer */ 2144 if (xnfp->xnf_tx_ring_dma_acchandle != NULL) { 2145 (void) ddi_dma_unbind_handle(xnfp->xnf_tx_ring_dma_handle); 2146 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 2147 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2148 xnfp->xnf_tx_ring_dma_acchandle = NULL; 2149 } 2150 2151 /* 2152 * Free handles for mapping (virtual address) pointers to 2153 * transmit data buffers to physical addresses 2154 */ 2155 for (i = 0; i < xnfp->xnf_n_tx; i++) { 2156 if (xnfp->xnf_tx_pkt_info[i].dma_handle != NULL) { 2157 ddi_dma_free_handle( 2158 &xnfp->xnf_tx_pkt_info[i].dma_handle); 2159 } 2160 } 2161 2162 } 2163 2164 static void 2165 xnf_release_mblks(xnf_t *xnfp) 2166 { 2167 int i; 2168 2169 for (i = 0; i < xnfp->xnf_n_tx; i++) { 2170 if (xnfp->xnf_tx_pkt_info[i].mp == NULL) 2171 continue; 2172 freemsg(xnfp->xnf_tx_pkt_info[i].mp); 2173 xnfp->xnf_tx_pkt_info[i].mp = NULL; 2174 (void) ddi_dma_unbind_handle( 2175 xnfp->xnf_tx_pkt_info[i].dma_handle); 2176 } 2177 } 2178 2179 /* 2180 * Remove a xmit buffer descriptor from the head of the free list and return 2181 * a pointer to it. If no buffers on list, attempt to allocate a new one. 2182 * Called with the tx_buf_mutex held. 2183 */ 2184 static struct xnf_buffer_desc * 2185 xnf_get_tx_buffer(xnf_t *xnfp) 2186 { 2187 struct xnf_buffer_desc *bdesc; 2188 2189 bdesc = xnfp->xnf_tx_free_list; 2190 if (bdesc != NULL) { 2191 xnfp->xnf_tx_free_list = bdesc->next; 2192 } else { 2193 bdesc = xnf_alloc_tx_buffer(xnfp); 2194 } 2195 return (bdesc); 2196 } 2197 2198 /* 2199 * Remove a buffer descriptor from the head of the free list and return 2200 * a pointer to it. If no buffers on list, attempt to allocate a new one. 2201 * Called with the rx_buf_mutex held. 2202 */ 2203 static struct xnf_buffer_desc * 2204 xnf_get_buffer(xnf_t *xnfp) 2205 { 2206 struct xnf_buffer_desc *bdesc; 2207 2208 bdesc = xnfp->xnf_free_list; 2209 if (bdesc != NULL) { 2210 xnfp->xnf_free_list = bdesc->next; 2211 xnfp->xnf_rx_descs_free--; 2212 } else { 2213 bdesc = xnf_alloc_buffer(xnfp); 2214 } 2215 return (bdesc); 2216 } 2217 2218 /* 2219 * Free a xmit buffer back to the xmit free list 2220 */ 2221 static void 2222 xnf_free_tx_buffer(struct xnf_buffer_desc *bp) 2223 { 2224 xnf_t *xnfp = bp->xnfp; 2225 2226 mutex_enter(&xnfp->xnf_tx_buf_mutex); 2227 bp->next = xnfp->xnf_tx_free_list; 2228 xnfp->xnf_tx_free_list = bp; 2229 mutex_exit(&xnfp->xnf_tx_buf_mutex); 2230 } 2231 2232 /* 2233 * Put a buffer descriptor onto the head of the free list. 2234 * for page-flip: 2235 * We can't really free these buffers back to the kernel 2236 * since we have given away their backing page to be used 2237 * by the back end net driver. 2238 * for hvcopy: 2239 * release all the memory 2240 */ 2241 static void 2242 xnf_free_buffer(struct xnf_buffer_desc *bdesc) 2243 { 2244 xnf_t *xnfp = bdesc->xnfp; 2245 2246 mutex_enter(&xnfp->xnf_rx_buf_mutex); 2247 if (xnfp->xnf_rx_hvcopy) { 2248 if (ddi_dma_unbind_handle(bdesc->dma_handle) != DDI_SUCCESS) 2249 goto out; 2250 ddi_dma_mem_free(&bdesc->acc_handle); 2251 ddi_dma_free_handle(&bdesc->dma_handle); 2252 kmem_free(bdesc, sizeof (*bdesc)); 2253 xnfp->xnf_rx_buffer_count--; 2254 } else { 2255 bdesc->next = xnfp->xnf_free_list; 2256 xnfp->xnf_free_list = bdesc; 2257 xnfp->xnf_rx_descs_free++; 2258 } 2259 out: 2260 mutex_exit(&xnfp->xnf_rx_buf_mutex); 2261 } 2262 2263 /* 2264 * Allocate a DMA-able xmit buffer, including a structure to 2265 * keep track of the buffer. Called with tx_buf_mutex held. 2266 */ 2267 static struct xnf_buffer_desc * 2268 xnf_alloc_tx_buffer(xnf_t *xnfp) 2269 { 2270 struct xnf_buffer_desc *bdesc; 2271 size_t len; 2272 2273 if ((bdesc = kmem_zalloc(sizeof (*bdesc), KM_NOSLEEP)) == NULL) 2274 return (NULL); 2275 2276 /* allocate a DMA access handle for receive buffer */ 2277 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &tx_buffer_dma_attr, 2278 0, 0, &bdesc->dma_handle) != DDI_SUCCESS) 2279 goto failure; 2280 2281 /* Allocate DMA-able memory for transmit buffer */ 2282 if (ddi_dma_mem_alloc(bdesc->dma_handle, 2283 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0, 2284 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 2285 goto failure_1; 2286 2287 bdesc->xnfp = xnfp; 2288 xnfp->xnf_tx_buffer_count++; 2289 2290 return (bdesc); 2291 2292 failure_1: 2293 ddi_dma_free_handle(&bdesc->dma_handle); 2294 2295 failure: 2296 kmem_free(bdesc, sizeof (*bdesc)); 2297 return (NULL); 2298 } 2299 2300 /* 2301 * Allocate a DMA-able receive buffer, including a structure to 2302 * keep track of the buffer. Called with rx_buf_mutex held. 2303 */ 2304 static struct xnf_buffer_desc * 2305 xnf_alloc_buffer(xnf_t *xnfp) 2306 { 2307 struct xnf_buffer_desc *bdesc; 2308 size_t len; 2309 uint_t ncookies; 2310 ddi_dma_cookie_t dma_cookie; 2311 long cnt; 2312 pfn_t pfn; 2313 2314 if (xnfp->xnf_rx_buffer_count >= xnfp->xnf_max_rx_bufs) 2315 return (NULL); 2316 2317 if ((bdesc = kmem_zalloc(sizeof (*bdesc), KM_NOSLEEP)) == NULL) 2318 return (NULL); 2319 2320 /* allocate a DMA access handle for receive buffer */ 2321 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &rx_buffer_dma_attr, 2322 0, 0, &bdesc->dma_handle) != DDI_SUCCESS) 2323 goto failure; 2324 2325 /* Allocate DMA-able memory for receive buffer */ 2326 if (ddi_dma_mem_alloc(bdesc->dma_handle, 2327 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0, 2328 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 2329 goto failure_1; 2330 2331 /* bind to virtual address of buffer to get physical address */ 2332 if (ddi_dma_addr_bind_handle(bdesc->dma_handle, NULL, 2333 bdesc->buf, PAGESIZE, DDI_DMA_READ | DDI_DMA_STREAMING, 2334 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies) != DDI_DMA_MAPPED) 2335 goto failure_2; 2336 2337 bdesc->buf_phys = dma_cookie.dmac_laddress; 2338 bdesc->xnfp = xnfp; 2339 if (xnfp->xnf_rx_hvcopy) { 2340 bdesc->free_rtn.free_func = xnf_copy_rcv_complete; 2341 } else { 2342 bdesc->free_rtn.free_func = xnf_rcv_complete; 2343 } 2344 bdesc->free_rtn.free_arg = (char *)bdesc; 2345 bdesc->grant_ref = GRANT_INVALID_REF; 2346 ASSERT(ncookies == 1); 2347 2348 xnfp->xnf_rx_buffer_count++; 2349 2350 if (!xnfp->xnf_rx_hvcopy) { 2351 /* 2352 * Unmap the page, and hand the machine page back 2353 * to xen so it can be used as a backend net buffer. 2354 */ 2355 pfn = xnf_btop(bdesc->buf_phys); 2356 cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn); 2357 if (cnt != 1) { 2358 cmn_err(CE_WARN, "unable to give a page back to the " 2359 "hypervisor\n"); 2360 } 2361 } 2362 2363 return (bdesc); 2364 2365 failure_2: 2366 ddi_dma_mem_free(&bdesc->acc_handle); 2367 2368 failure_1: 2369 ddi_dma_free_handle(&bdesc->dma_handle); 2370 2371 failure: 2372 kmem_free(bdesc, sizeof (*bdesc)); 2373 return (NULL); 2374 } 2375 2376 /* 2377 * Statistics. 2378 */ 2379 static char *xnf_aux_statistics[] = { 2380 "tx_cksum_deferred", 2381 "rx_cksum_no_need", 2382 "interrupts", 2383 "unclaimed_interrupts", 2384 "tx_pullup", 2385 "tx_pagebndry", 2386 "tx_attempt", 2387 "rx_no_ringbuf", 2388 "hvcopy_packet_processed", 2389 }; 2390 2391 static int 2392 xnf_kstat_aux_update(kstat_t *ksp, int flag) 2393 { 2394 xnf_t *xnfp; 2395 kstat_named_t *knp; 2396 2397 if (flag != KSTAT_READ) 2398 return (EACCES); 2399 2400 xnfp = ksp->ks_private; 2401 knp = ksp->ks_data; 2402 2403 /* 2404 * Assignment order must match that of the names in 2405 * xnf_aux_statistics. 2406 */ 2407 (knp++)->value.ui64 = xnfp->xnf_stat_tx_cksum_deferred; 2408 (knp++)->value.ui64 = xnfp->xnf_stat_rx_cksum_no_need; 2409 2410 (knp++)->value.ui64 = xnfp->xnf_stat_interrupts; 2411 (knp++)->value.ui64 = xnfp->xnf_stat_unclaimed_interrupts; 2412 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pullup; 2413 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pagebndry; 2414 (knp++)->value.ui64 = xnfp->xnf_stat_tx_attempt; 2415 (knp++)->value.ui64 = xnfp->xnf_stat_rx_no_ringbuf; 2416 2417 (knp++)->value.ui64 = xnfp->xnf_stat_hvcopy_packet_processed; 2418 2419 return (0); 2420 } 2421 2422 static boolean_t 2423 xnf_kstat_init(xnf_t *xnfp) 2424 { 2425 int nstat = sizeof (xnf_aux_statistics) / 2426 sizeof (xnf_aux_statistics[0]); 2427 char **cp = xnf_aux_statistics; 2428 kstat_named_t *knp; 2429 2430 /* 2431 * Create and initialise kstats. 2432 */ 2433 if ((xnfp->xnf_kstat_aux = kstat_create("xnf", 2434 ddi_get_instance(xnfp->xnf_devinfo), 2435 "aux_statistics", "net", KSTAT_TYPE_NAMED, 2436 nstat, 0)) == NULL) 2437 return (B_FALSE); 2438 2439 xnfp->xnf_kstat_aux->ks_private = xnfp; 2440 xnfp->xnf_kstat_aux->ks_update = xnf_kstat_aux_update; 2441 2442 knp = xnfp->xnf_kstat_aux->ks_data; 2443 while (nstat > 0) { 2444 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 2445 2446 knp++; 2447 cp++; 2448 nstat--; 2449 } 2450 2451 kstat_install(xnfp->xnf_kstat_aux); 2452 2453 return (B_TRUE); 2454 } 2455 2456 static int 2457 xnf_stat(void *arg, uint_t stat, uint64_t *val) 2458 { 2459 xnf_t *xnfp = arg; 2460 2461 mutex_enter(&xnfp->xnf_intrlock); 2462 mutex_enter(&xnfp->xnf_txlock); 2463 2464 #define mac_stat(q, r) \ 2465 case (MAC_STAT_##q): \ 2466 *val = xnfp->xnf_stat_##r; \ 2467 break 2468 2469 #define ether_stat(q, r) \ 2470 case (ETHER_STAT_##q): \ 2471 *val = xnfp->xnf_stat_##r; \ 2472 break 2473 2474 switch (stat) { 2475 2476 mac_stat(IPACKETS, ipackets); 2477 mac_stat(OPACKETS, opackets); 2478 mac_stat(RBYTES, rbytes); 2479 mac_stat(OBYTES, obytes); 2480 mac_stat(NORCVBUF, norxbuf); 2481 mac_stat(IERRORS, errrx); 2482 mac_stat(NOXMTBUF, tx_defer); 2483 2484 ether_stat(MACRCV_ERRORS, mac_rcv_error); 2485 ether_stat(TOOSHORT_ERRORS, runt); 2486 2487 default: 2488 mutex_exit(&xnfp->xnf_txlock); 2489 mutex_exit(&xnfp->xnf_intrlock); 2490 2491 return (ENOTSUP); 2492 } 2493 2494 #undef mac_stat 2495 #undef ether_stat 2496 2497 mutex_exit(&xnfp->xnf_txlock); 2498 mutex_exit(&xnfp->xnf_intrlock); 2499 2500 return (0); 2501 } 2502 2503 /*ARGSUSED*/ 2504 static void 2505 xnf_blank(void *arg, time_t ticks, uint_t count) 2506 { 2507 /* 2508 * XXPV dme: blanking is not currently implemented. 2509 * 2510 * It's not obvious how to use the 'ticks' argument here. 2511 * 2512 * 'Count' might be used as an indicator of how to set 2513 * rsp_event when posting receive buffers to the rx_ring. It 2514 * would replace the code at the tail of xnf_process_recv() 2515 * that simply indicates that the next completed packet should 2516 * cause an interrupt. 2517 */ 2518 } 2519 2520 static void 2521 xnf_resources(void *arg) 2522 { 2523 xnf_t *xnfp = arg; 2524 mac_rx_fifo_t mrf; 2525 2526 mrf.mrf_type = MAC_RX_FIFO; 2527 mrf.mrf_blank = xnf_blank; 2528 mrf.mrf_arg = (void *)xnfp; 2529 mrf.mrf_normal_blank_time = 128; /* XXPV dme: see xnf_blank() */ 2530 mrf.mrf_normal_pkt_count = 8; /* XXPV dme: see xnf_blank() */ 2531 2532 xnfp->xnf_rx_handle = mac_resource_add(xnfp->xnf_mh, 2533 (mac_resource_t *)&mrf); 2534 } 2535 2536 /*ARGSUSED*/ 2537 static void 2538 xnf_ioctl(void *arg, queue_t *q, mblk_t *mp) 2539 { 2540 miocnak(q, mp, 0, EINVAL); 2541 } 2542 2543 static boolean_t 2544 xnf_getcapab(void *arg, mac_capab_t cap, void *cap_data) 2545 { 2546 xnf_t *xnfp = arg; 2547 2548 switch (cap) { 2549 case MAC_CAPAB_HCKSUM: { 2550 uint32_t *capab = cap_data; 2551 2552 /* 2553 * Whilst the flag used to communicate with the IO 2554 * domain is called "NETTXF_csum_blank", the checksum 2555 * in the packet must contain the pseudo-header 2556 * checksum and not zero. 2557 * 2558 * To help out the IO domain, we might use 2559 * HCKSUM_INET_PARTIAL. Unfortunately our stack will 2560 * then use checksum offload for IPv6 packets, which 2561 * the IO domain can't handle. 2562 * 2563 * As a result, we declare outselves capable of 2564 * HCKSUM_INET_FULL_V4. This means that we receive 2565 * IPv4 packets from the stack with a blank checksum 2566 * field and must insert the pseudo-header checksum 2567 * before passing the packet to the IO domain. 2568 */ 2569 if (xnfp->xnf_cksum_offload) 2570 *capab = HCKSUM_INET_FULL_V4; 2571 else 2572 *capab = 0; 2573 break; 2574 } 2575 2576 case MAC_CAPAB_POLL: 2577 /* Just return B_TRUE. */ 2578 break; 2579 2580 default: 2581 return (B_FALSE); 2582 } 2583 2584 return (B_TRUE); 2585 } 2586 2587 /*ARGSUSED*/ 2588 static void 2589 oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 2590 void *arg, void *impl_data) 2591 { 2592 xnf_t *xnfp = ddi_get_driver_private(dip); 2593 XenbusState new_state = *(XenbusState *)impl_data; 2594 2595 ASSERT(xnfp != NULL); 2596 2597 switch (new_state) { 2598 case XenbusStateConnected: 2599 mutex_enter(&xnfp->xnf_intrlock); 2600 mutex_enter(&xnfp->xnf_txlock); 2601 2602 xnfp->xnf_connected = B_TRUE; 2603 /* 2604 * wake up threads wanting to send data to backend, 2605 * but got blocked due to backend is not ready 2606 */ 2607 cv_broadcast(&xnfp->xnf_cv); 2608 2609 mutex_exit(&xnfp->xnf_txlock); 2610 mutex_exit(&xnfp->xnf_intrlock); 2611 2612 /* 2613 * kick backend in case it missed any tx request 2614 * in the TX ring buffer 2615 */ 2616 ec_notify_via_evtchn(xnfp->xnf_evtchn); 2617 2618 /* 2619 * there maybe already queued rx data in the RX ring 2620 * sent by backend after it gets connected but before 2621 * we see its state change here, so we call our intr 2622 * handling routine to handle them, if any 2623 */ 2624 (void) xnf_intr((caddr_t)xnfp); 2625 2626 break; 2627 2628 default: 2629 break; 2630 } 2631 } 2632 2633 /* 2634 * Check whether backend is capable of and willing to talk 2635 * to us via hypervisor copy, as opposed to page flip. 2636 */ 2637 static boolean_t 2638 xnf_hvcopy_peer_status(dev_info_t *devinfo) 2639 { 2640 int be_rx_copy; 2641 int err; 2642 2643 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(devinfo), 2644 "feature-rx-copy", "%d", &be_rx_copy); 2645 /* 2646 * If we fail to read the store we assume that the key is 2647 * absent, implying an older domain at the far end. Older 2648 * domains cannot do HV copy (we assume ..). 2649 */ 2650 if (err != 0) 2651 be_rx_copy = 0; 2652 2653 return (be_rx_copy?B_TRUE:B_FALSE); 2654 } 2655