1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * 29 * Copyright (c) 2004 Christian Limpach. 30 * All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. This section intentionally left blank. 41 * 4. The name of the author may not be used to endorse or promote products 42 * derived from this software without specific prior written permission. 43 * 44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 49 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 53 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 */ 55 /* 56 * Section 3 of the above license was updated in response to bug 6379571. 57 */ 58 59 /* 60 * xnf.c - Nemo-based network driver for domU 61 */ 62 63 #include <sys/types.h> 64 #include <sys/errno.h> 65 #include <sys/param.h> 66 #include <sys/sysmacros.h> 67 #include <sys/systm.h> 68 #include <sys/stream.h> 69 #include <sys/strsubr.h> 70 #include <sys/conf.h> 71 #include <sys/ddi.h> 72 #include <sys/devops.h> 73 #include <sys/sunddi.h> 74 #include <sys/sunndi.h> 75 #include <sys/dlpi.h> 76 #include <sys/ethernet.h> 77 #include <sys/strsun.h> 78 #include <sys/pattr.h> 79 #include <inet/ip.h> 80 #include <inet/ip_impl.h> 81 #include <sys/gld.h> 82 #include <sys/modctl.h> 83 #include <sys/mac.h> 84 #include <sys/mac_ether.h> 85 #include <sys/bootinfo.h> 86 #include <sys/mach_mmu.h> 87 #ifdef XPV_HVM_DRIVER 88 #include <sys/xpv_support.h> 89 #include <sys/hypervisor.h> 90 #else 91 #include <sys/hypervisor.h> 92 #include <sys/evtchn_impl.h> 93 #include <sys/balloon_impl.h> 94 #endif 95 #include <xen/public/io/netif.h> 96 #include <sys/gnttab.h> 97 #include <xen/sys/xendev.h> 98 #include <sys/sdt.h> 99 100 #include <io/xnf.h> 101 102 103 /* 104 * Declarations and Module Linkage 105 */ 106 107 #if defined(DEBUG) || defined(__lint) 108 #define XNF_DEBUG 109 int xnfdebug = 0; 110 #endif 111 112 /* 113 * On a 32 bit PAE system physical and machine addresses are larger 114 * than 32 bits. ddi_btop() on such systems take an unsigned long 115 * argument, and so addresses above 4G are truncated before ddi_btop() 116 * gets to see them. To avoid this, code the shift operation here. 117 */ 118 #define xnf_btop(addr) ((addr) >> PAGESHIFT) 119 120 boolean_t xnf_cksum_offload = B_TRUE; 121 122 /* Default value for hypervisor-based copy operations */ 123 boolean_t xnf_rx_hvcopy = B_TRUE; 124 125 /* 126 * Should pages used for transmit be readonly for the peer? 127 */ 128 boolean_t xnf_tx_pages_readonly = B_FALSE; 129 /* 130 * Packets under this size are bcopied instead of using desballoc. 131 * Choose a value > XNF_FRAMESIZE (1514) to force the receive path to 132 * always copy. 133 */ 134 unsigned int xnf_rx_bcopy_thresh = 64; 135 136 unsigned int xnf_max_tx_frags = 1; 137 138 /* Required system entry points */ 139 static int xnf_attach(dev_info_t *, ddi_attach_cmd_t); 140 static int xnf_detach(dev_info_t *, ddi_detach_cmd_t); 141 142 /* Required driver entry points for Nemo */ 143 static int xnf_start(void *); 144 static void xnf_stop(void *); 145 static int xnf_set_mac_addr(void *, const uint8_t *); 146 static int xnf_set_multicast(void *, boolean_t, const uint8_t *); 147 static int xnf_set_promiscuous(void *, boolean_t); 148 static mblk_t *xnf_send(void *, mblk_t *); 149 static uint_t xnf_intr(caddr_t); 150 static int xnf_stat(void *, uint_t, uint64_t *); 151 static void xnf_blank(void *, time_t, uint_t); 152 static void xnf_resources(void *); 153 static void xnf_ioctl(void *, queue_t *, mblk_t *); 154 static boolean_t xnf_getcapab(void *, mac_capab_t, void *); 155 156 /* Driver private functions */ 157 static int xnf_alloc_dma_resources(xnf_t *); 158 static void xnf_release_dma_resources(xnf_t *); 159 static mblk_t *xnf_process_recv(xnf_t *); 160 static void xnf_rcv_complete(struct xnf_buffer_desc *); 161 static void xnf_release_mblks(xnf_t *); 162 static struct xnf_buffer_desc *xnf_alloc_tx_buffer(xnf_t *); 163 static struct xnf_buffer_desc *xnf_alloc_buffer(xnf_t *); 164 static struct xnf_buffer_desc *xnf_get_tx_buffer(xnf_t *); 165 static struct xnf_buffer_desc *xnf_get_buffer(xnf_t *); 166 static void xnf_free_buffer(struct xnf_buffer_desc *); 167 static void xnf_free_tx_buffer(struct xnf_buffer_desc *); 168 void xnf_send_driver_status(int, int); 169 static void rx_buffer_hang(xnf_t *, struct xnf_buffer_desc *); 170 static int xnf_clean_tx_ring(xnf_t *); 171 static void oe_state_change(dev_info_t *, ddi_eventcookie_t, 172 void *, void *); 173 static mblk_t *xnf_process_hvcopy_recv(xnf_t *xnfp); 174 static boolean_t xnf_hvcopy_peer_status(dev_info_t *devinfo); 175 static boolean_t xnf_kstat_init(xnf_t *xnfp); 176 177 /* 178 * XXPV dme: remove MC_IOCTL? 179 */ 180 static mac_callbacks_t xnf_callbacks = { 181 MC_RESOURCES | MC_IOCTL | MC_GETCAPAB, 182 xnf_stat, 183 xnf_start, 184 xnf_stop, 185 xnf_set_promiscuous, 186 xnf_set_multicast, 187 xnf_set_mac_addr, 188 xnf_send, 189 xnf_resources, 190 xnf_ioctl, 191 xnf_getcapab 192 }; 193 194 #define GRANT_INVALID_REF 0 195 const int xnf_rx_bufs_lowat = 4 * NET_RX_RING_SIZE; 196 const int xnf_rx_bufs_hiwat = 8 * NET_RX_RING_SIZE; /* default max */ 197 198 /* DMA attributes for network ring buffer */ 199 static ddi_dma_attr_t ringbuf_dma_attr = { 200 DMA_ATTR_V0, /* version of this structure */ 201 0, /* lowest usable address */ 202 0xffffffffffffffffULL, /* highest usable address */ 203 0x7fffffff, /* maximum DMAable byte count */ 204 MMU_PAGESIZE, /* alignment in bytes */ 205 0x7ff, /* bitmap of burst sizes */ 206 1, /* minimum transfer */ 207 0xffffffffU, /* maximum transfer */ 208 0xffffffffffffffffULL, /* maximum segment length */ 209 1, /* maximum number of segments */ 210 1, /* granularity */ 211 0, /* flags (reserved) */ 212 }; 213 214 /* DMA attributes for transmit data */ 215 static ddi_dma_attr_t tx_buffer_dma_attr = { 216 DMA_ATTR_V0, /* version of this structure */ 217 0, /* lowest usable address */ 218 0xffffffffffffffffULL, /* highest usable address */ 219 0x7fffffff, /* maximum DMAable byte count */ 220 MMU_PAGESIZE, /* alignment in bytes */ 221 0x7ff, /* bitmap of burst sizes */ 222 1, /* minimum transfer */ 223 0xffffffffU, /* maximum transfer */ 224 0xffffffffffffffffULL, /* maximum segment length */ 225 1, /* maximum number of segments */ 226 1, /* granularity */ 227 0, /* flags (reserved) */ 228 }; 229 230 /* DMA attributes for a receive buffer */ 231 static ddi_dma_attr_t rx_buffer_dma_attr = { 232 DMA_ATTR_V0, /* version of this structure */ 233 0, /* lowest usable address */ 234 0xffffffffffffffffULL, /* highest usable address */ 235 0x7fffffff, /* maximum DMAable byte count */ 236 MMU_PAGESIZE, /* alignment in bytes */ 237 0x7ff, /* bitmap of burst sizes */ 238 1, /* minimum transfer */ 239 0xffffffffU, /* maximum transfer */ 240 0xffffffffffffffffULL, /* maximum segment length */ 241 1, /* maximum number of segments */ 242 1, /* granularity */ 243 0, /* flags (reserved) */ 244 }; 245 246 /* DMA access attributes for registers and descriptors */ 247 static ddi_device_acc_attr_t accattr = { 248 DDI_DEVICE_ATTR_V0, 249 DDI_STRUCTURE_LE_ACC, /* This is a little-endian device */ 250 DDI_STRICTORDER_ACC 251 }; 252 253 /* DMA access attributes for data: NOT to be byte swapped. */ 254 static ddi_device_acc_attr_t data_accattr = { 255 DDI_DEVICE_ATTR_V0, 256 DDI_NEVERSWAP_ACC, 257 DDI_STRICTORDER_ACC 258 }; 259 260 unsigned char xnf_broadcastaddr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 261 int xnf_diagnose = 0; /* Patchable global for diagnostic purposes */ 262 263 DDI_DEFINE_STREAM_OPS(xnf_dev_ops, nulldev, nulldev, xnf_attach, xnf_detach, 264 nodev, NULL, D_MP, NULL); 265 266 static struct modldrv xnf_modldrv = { 267 &mod_driverops, 268 "Virtual Ethernet driver", 269 &xnf_dev_ops 270 }; 271 272 static struct modlinkage modlinkage = { 273 MODREV_1, &xnf_modldrv, NULL 274 }; 275 276 int 277 _init(void) 278 { 279 int r; 280 281 mac_init_ops(&xnf_dev_ops, "xnf"); 282 r = mod_install(&modlinkage); 283 if (r != DDI_SUCCESS) 284 mac_fini_ops(&xnf_dev_ops); 285 286 return (r); 287 } 288 289 int 290 _fini(void) 291 { 292 return (EBUSY); /* XXPV dme: should be removable */ 293 } 294 295 int 296 _info(struct modinfo *modinfop) 297 { 298 return (mod_info(&modlinkage, modinfop)); 299 } 300 301 static int 302 xnf_setup_rings(xnf_t *xnfp) 303 { 304 int ix, err; 305 RING_IDX i; 306 struct xnf_buffer_desc *bdesc, *rbp; 307 struct xenbus_device *xsd; 308 domid_t oeid; 309 310 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 311 xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 312 313 if (xnfp->xnf_tx_ring_ref != GRANT_INVALID_REF) 314 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 315 316 err = gnttab_grant_foreign_access(oeid, 317 xnf_btop(pa_to_ma(xnfp->xnf_tx_ring_phys_addr)), 0); 318 if (err <= 0) { 319 err = -err; 320 xenbus_dev_error(xsd, err, "granting access to tx ring page"); 321 goto out; 322 } 323 xnfp->xnf_tx_ring_ref = (grant_ref_t)err; 324 325 if (xnfp->xnf_rx_ring_ref != GRANT_INVALID_REF) 326 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 327 328 err = gnttab_grant_foreign_access(oeid, 329 xnf_btop(pa_to_ma(xnfp->xnf_rx_ring_phys_addr)), 0); 330 if (err <= 0) { 331 err = -err; 332 xenbus_dev_error(xsd, err, "granting access to rx ring page"); 333 goto out; 334 } 335 xnfp->xnf_rx_ring_ref = (grant_ref_t)err; 336 337 338 mutex_enter(&xnfp->xnf_intrlock); 339 340 /* 341 * Cleanup the TX ring. We just clean up any valid tx_pktinfo structs 342 * and reset the ring. Note that this can lose packets after a resume, 343 * but we expect to stagger on. 344 */ 345 mutex_enter(&xnfp->xnf_txlock); 346 347 for (i = 0; i < xnfp->xnf_n_tx; i++) { 348 struct tx_pktinfo *txp = &xnfp->xnf_tx_pkt_info[i]; 349 350 txp->id = i + 1; 351 352 if (txp->grant_ref == GRANT_INVALID_REF) { 353 ASSERT(txp->mp == NULL); 354 ASSERT(txp->bdesc == NULL); 355 continue; 356 } 357 358 if (gnttab_query_foreign_access(txp->grant_ref) != 0) 359 panic("tx grant still in use by backend domain"); 360 361 freemsg(txp->mp); 362 txp->mp = NULL; 363 364 (void) ddi_dma_unbind_handle(txp->dma_handle); 365 366 if (txp->bdesc != NULL) { 367 xnf_free_tx_buffer(txp->bdesc); 368 txp->bdesc = NULL; 369 } 370 371 (void) gnttab_end_foreign_access_ref(txp->grant_ref, 372 xnfp->xnf_tx_pages_readonly); 373 gnttab_release_grant_reference(&xnfp->xnf_gref_tx_head, 374 txp->grant_ref); 375 txp->grant_ref = GRANT_INVALID_REF; 376 } 377 378 xnfp->xnf_tx_pkt_id_list = 0; 379 xnfp->xnf_tx_ring.rsp_cons = 0; 380 xnfp->xnf_tx_ring.req_prod_pvt = 0; 381 382 /* LINTED: constant in conditional context */ 383 SHARED_RING_INIT(xnfp->xnf_tx_ring.sring); 384 385 mutex_exit(&xnfp->xnf_txlock); 386 387 /* 388 * Rebuild the RX ring. We have to rebuild the RX ring because some of 389 * our pages are currently flipped out/granted so we can't just free 390 * the RX buffers. Reclaim any unprocessed recv buffers, they won't be 391 * useable anyway since the mfn's they refer to are no longer valid. 392 * Grant the backend domain access to each hung rx buffer. 393 */ 394 i = xnfp->xnf_rx_ring.rsp_cons; 395 while (i++ != xnfp->xnf_rx_ring.sring->req_prod) { 396 volatile netif_rx_request_t *rxrp; 397 398 rxrp = RING_GET_REQUEST(&xnfp->xnf_rx_ring, i); 399 ix = rxrp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0); 400 rbp = xnfp->xnf_rxpkt_bufptr[ix]; 401 if (rbp != NULL) { 402 grant_ref_t ref = rbp->grant_ref; 403 404 ASSERT(ref != GRANT_INVALID_REF); 405 if (xnfp->xnf_rx_hvcopy) { 406 pfn_t pfn = xnf_btop(rbp->buf_phys); 407 mfn_t mfn = pfn_to_mfn(pfn); 408 409 gnttab_grant_foreign_access_ref(ref, oeid, 410 mfn, 0); 411 } else { 412 gnttab_grant_foreign_transfer_ref(ref, 413 oeid, 0); 414 } 415 rxrp->id = ix; 416 rxrp->gref = ref; 417 } 418 } 419 420 /* 421 * Reset the ring pointers to initial state. 422 * Hang buffers for any empty ring slots. 423 */ 424 xnfp->xnf_rx_ring.rsp_cons = 0; 425 xnfp->xnf_rx_ring.req_prod_pvt = 0; 426 427 /* LINTED: constant in conditional context */ 428 SHARED_RING_INIT(xnfp->xnf_rx_ring.sring); 429 430 for (i = 0; i < NET_RX_RING_SIZE; i++) { 431 xnfp->xnf_rx_ring.req_prod_pvt = i; 432 if (xnfp->xnf_rxpkt_bufptr[i] != NULL) 433 continue; 434 if ((bdesc = xnf_get_buffer(xnfp)) == NULL) 435 break; 436 rx_buffer_hang(xnfp, bdesc); 437 } 438 xnfp->xnf_rx_ring.req_prod_pvt = i; 439 /* LINTED: constant in conditional context */ 440 RING_PUSH_REQUESTS(&xnfp->xnf_rx_ring); 441 442 mutex_exit(&xnfp->xnf_intrlock); 443 444 return (0); 445 446 out: 447 if (xnfp->xnf_tx_ring_ref != GRANT_INVALID_REF) 448 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 449 xnfp->xnf_tx_ring_ref = GRANT_INVALID_REF; 450 451 if (xnfp->xnf_rx_ring_ref != GRANT_INVALID_REF) 452 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 453 xnfp->xnf_rx_ring_ref = GRANT_INVALID_REF; 454 455 return (err); 456 } 457 458 459 /* Called when the upper layers free a message we passed upstream */ 460 static void 461 xnf_copy_rcv_complete(struct xnf_buffer_desc *bdesc) 462 { 463 (void) ddi_dma_unbind_handle(bdesc->dma_handle); 464 ddi_dma_mem_free(&bdesc->acc_handle); 465 ddi_dma_free_handle(&bdesc->dma_handle); 466 kmem_free(bdesc, sizeof (*bdesc)); 467 } 468 469 470 /* 471 * Connect driver to back end, called to set up communication with 472 * back end driver both initially and on resume after restore/migrate. 473 */ 474 void 475 xnf_be_connect(xnf_t *xnfp) 476 { 477 const char *message; 478 xenbus_transaction_t xbt; 479 struct xenbus_device *xsd; 480 char *xsname; 481 int err; 482 483 ASSERT(!xnfp->xnf_connected); 484 485 xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 486 xsname = xvdi_get_xsname(xnfp->xnf_devinfo); 487 488 err = xnf_setup_rings(xnfp); 489 if (err != 0) { 490 cmn_err(CE_WARN, "failed to set up tx/rx rings"); 491 xenbus_dev_error(xsd, err, "setting up ring"); 492 return; 493 } 494 495 again: 496 err = xenbus_transaction_start(&xbt); 497 if (err != 0) { 498 xenbus_dev_error(xsd, EIO, "starting transaction"); 499 return; 500 } 501 502 err = xenbus_printf(xbt, xsname, "tx-ring-ref", "%u", 503 xnfp->xnf_tx_ring_ref); 504 if (err != 0) { 505 message = "writing tx ring-ref"; 506 goto abort_transaction; 507 } 508 509 err = xenbus_printf(xbt, xsname, "rx-ring-ref", "%u", 510 xnfp->xnf_rx_ring_ref); 511 if (err != 0) { 512 message = "writing rx ring-ref"; 513 goto abort_transaction; 514 } 515 516 err = xenbus_printf(xbt, xsname, "event-channel", "%u", 517 xnfp->xnf_evtchn); 518 if (err != 0) { 519 message = "writing event-channel"; 520 goto abort_transaction; 521 } 522 523 err = xenbus_printf(xbt, xsname, "feature-rx-notify", "%d", 1); 524 if (err != 0) { 525 message = "writing feature-rx-notify"; 526 goto abort_transaction; 527 } 528 529 if (!xnfp->xnf_tx_pages_readonly) { 530 err = xenbus_printf(xbt, xsname, "feature-tx-writable", 531 "%d", 1); 532 if (err != 0) { 533 message = "writing feature-tx-writable"; 534 goto abort_transaction; 535 } 536 } 537 538 err = xenbus_printf(xbt, xsname, "feature-no-csum-offload", "%d", 539 xnfp->xnf_cksum_offload ? 0 : 1); 540 if (err != 0) { 541 message = "writing feature-no-csum-offload"; 542 goto abort_transaction; 543 } 544 err = xenbus_printf(xbt, xsname, "request-rx-copy", "%d", 545 xnfp->xnf_rx_hvcopy ? 1 : 0); 546 if (err != 0) { 547 message = "writing request-rx-copy"; 548 goto abort_transaction; 549 } 550 551 err = xenbus_printf(xbt, xsname, "state", "%d", XenbusStateConnected); 552 if (err != 0) { 553 message = "writing frontend XenbusStateConnected"; 554 goto abort_transaction; 555 } 556 557 err = xenbus_transaction_end(xbt, 0); 558 if (err != 0) { 559 if (err == EAGAIN) 560 goto again; 561 xenbus_dev_error(xsd, err, "completing transaction"); 562 } 563 564 return; 565 566 abort_transaction: 567 (void) xenbus_transaction_end(xbt, 1); 568 xenbus_dev_error(xsd, err, "%s", message); 569 } 570 571 /* 572 * Read config info from xenstore 573 */ 574 void 575 xnf_read_config(xnf_t *xnfp) 576 { 577 char mac[ETHERADDRL * 3]; 578 int err, be_no_cksum_offload; 579 580 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->xnf_devinfo), "mac", 581 "%s", (char *)&mac[0]); 582 if (err != 0) { 583 /* 584 * bad: we're supposed to be set up with a proper mac 585 * addr. at this point 586 */ 587 cmn_err(CE_WARN, "%s%d: no mac address", 588 ddi_driver_name(xnfp->xnf_devinfo), 589 ddi_get_instance(xnfp->xnf_devinfo)); 590 return; 591 } 592 if (ether_aton(mac, xnfp->xnf_mac_addr) != ETHERADDRL) { 593 err = ENOENT; 594 xenbus_dev_error(xvdi_get_xsd(xnfp->xnf_devinfo), ENOENT, 595 "parsing %s/mac", xvdi_get_xsname(xnfp->xnf_devinfo)); 596 return; 597 } 598 599 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->xnf_devinfo), 600 "feature-no-csum-offload", "%d", &be_no_cksum_offload); 601 /* 602 * If we fail to read the store we assume that the key is 603 * absent, implying an older domain at the far end. Older 604 * domains always support checksum offload. 605 */ 606 if (err != 0) 607 be_no_cksum_offload = 0; 608 /* 609 * If the far end cannot do checksum offload or we do not wish 610 * to do it, disable it. 611 */ 612 if ((be_no_cksum_offload == 1) || !xnfp->xnf_cksum_offload) 613 xnfp->xnf_cksum_offload = B_FALSE; 614 } 615 616 /* 617 * attach(9E) -- Attach a device to the system 618 * 619 * Called once for each board successfully probed. 620 */ 621 static int 622 xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) 623 { 624 mac_register_t *macp; 625 xnf_t *xnfp; 626 int err; 627 628 #ifdef XNF_DEBUG 629 if (xnfdebug & XNF_DEBUG_DDI) 630 printf("xnf%d: attach(0x%p)\n", ddi_get_instance(devinfo), 631 (void *)devinfo); 632 #endif 633 634 switch (cmd) { 635 case DDI_RESUME: 636 xnfp = ddi_get_driver_private(devinfo); 637 638 (void) xvdi_resume(devinfo); 639 (void) xvdi_alloc_evtchn(devinfo); 640 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 641 #ifdef XPV_HVM_DRIVER 642 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, 643 xnfp); 644 #else 645 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, 646 (caddr_t)xnfp); 647 #endif 648 xnf_be_connect(xnfp); 649 /* 650 * Our MAC address may have changed if we're resuming: 651 * - on a different host 652 * - on the same one and got a different MAC address 653 * because we didn't specify one of our own. 654 * so it's useful to claim that it changed in order that 655 * IP send out a gratuitous ARP. 656 */ 657 mac_unicst_update(xnfp->xnf_mh, xnfp->xnf_mac_addr); 658 return (DDI_SUCCESS); 659 660 case DDI_ATTACH: 661 break; 662 663 default: 664 return (DDI_FAILURE); 665 } 666 667 /* 668 * Allocate gld_mac_info_t and xnf_instance structures 669 */ 670 macp = mac_alloc(MAC_VERSION); 671 if (macp == NULL) 672 return (DDI_FAILURE); 673 xnfp = kmem_zalloc(sizeof (*xnfp), KM_SLEEP); 674 675 macp->m_dip = devinfo; 676 macp->m_driver = xnfp; 677 xnfp->xnf_devinfo = devinfo; 678 679 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 680 macp->m_src_addr = xnfp->xnf_mac_addr; 681 macp->m_callbacks = &xnf_callbacks; 682 macp->m_min_sdu = 0; 683 macp->m_max_sdu = XNF_MAXPKT; 684 685 xnfp->xnf_running = B_FALSE; 686 xnfp->xnf_connected = B_FALSE; 687 xnfp->xnf_cksum_offload = xnf_cksum_offload; 688 xnfp->xnf_tx_pages_readonly = xnf_tx_pages_readonly; 689 690 xnfp->xnf_rx_hvcopy = xnf_hvcopy_peer_status(devinfo) && xnf_rx_hvcopy; 691 #ifdef XPV_HVM_DRIVER 692 /* 693 * Report our version to dom0. 694 */ 695 if (xenbus_printf(XBT_NULL, "hvmpv/xnf", "version", "%d", 696 HVMPV_XNF_VERS)) 697 cmn_err(CE_WARN, "xnf: couldn't write version\n"); 698 699 if (!xnfp->xnf_rx_hvcopy) { 700 cmn_err(CE_WARN, "The xnf driver requires a dom0 that " 701 "supports 'feature-rx-copy'"); 702 goto failure; 703 } 704 #endif 705 706 /* 707 * Get the iblock cookie with which to initialize the mutexes. 708 */ 709 if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->xnf_icookie) 710 != DDI_SUCCESS) 711 goto failure; 712 /* 713 * Driver locking strategy: the txlock protects all paths 714 * through the driver, except the interrupt thread. 715 * If the interrupt thread needs to do something which could 716 * affect the operation of any other part of the driver, 717 * it needs to acquire the txlock mutex. 718 */ 719 mutex_init(&xnfp->xnf_tx_buf_mutex, 720 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 721 mutex_init(&xnfp->xnf_rx_buf_mutex, 722 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 723 mutex_init(&xnfp->xnf_txlock, 724 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 725 mutex_init(&xnfp->xnf_intrlock, 726 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 727 cv_init(&xnfp->xnf_cv, NULL, CV_DEFAULT, NULL); 728 729 xnfp->xnf_gref_tx_head = (grant_ref_t)-1; 730 xnfp->xnf_gref_rx_head = (grant_ref_t)-1; 731 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, 732 &xnfp->xnf_gref_tx_head) < 0) { 733 cmn_err(CE_WARN, "xnf%d: can't alloc tx grant refs", 734 ddi_get_instance(xnfp->xnf_devinfo)); 735 goto failure_1; 736 } 737 if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, 738 &xnfp->xnf_gref_rx_head) < 0) { 739 cmn_err(CE_WARN, "xnf%d: can't alloc rx grant refs", 740 ddi_get_instance(xnfp->xnf_devinfo)); 741 goto failure_1; 742 } 743 if (xnf_alloc_dma_resources(xnfp) == DDI_FAILURE) { 744 cmn_err(CE_WARN, "xnf%d: failed to allocate and initialize " 745 "driver data structures", 746 ddi_get_instance(xnfp->xnf_devinfo)); 747 goto failure_1; 748 } 749 750 xnfp->xnf_rx_ring.sring->rsp_event = 751 xnfp->xnf_tx_ring.sring->rsp_event = 1; 752 753 xnfp->xnf_tx_ring_ref = GRANT_INVALID_REF; 754 xnfp->xnf_rx_ring_ref = GRANT_INVALID_REF; 755 756 /* set driver private pointer now */ 757 ddi_set_driver_private(devinfo, xnfp); 758 759 if (xvdi_add_event_handler(devinfo, XS_OE_STATE, oe_state_change) 760 != DDI_SUCCESS) 761 goto failure_1; 762 763 if (!xnf_kstat_init(xnfp)) 764 goto failure_2; 765 766 /* 767 * Allocate an event channel, add the interrupt handler and 768 * bind it to the event channel. 769 */ 770 (void) xvdi_alloc_evtchn(devinfo); 771 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 772 #ifdef XPV_HVM_DRIVER 773 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, xnfp); 774 #else 775 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, (caddr_t)xnfp); 776 #endif 777 778 xnf_read_config(xnfp); 779 err = mac_register(macp, &xnfp->xnf_mh); 780 mac_free(macp); 781 macp = NULL; 782 if (err != 0) 783 goto failure_3; 784 785 #ifdef XPV_HVM_DRIVER 786 /* 787 * In the HVM case, this driver essentially replaces a driver for 788 * a 'real' PCI NIC. Without the "model" property set to 789 * "Ethernet controller", like the PCI code does, netbooting does 790 * not work correctly, as strplumb_get_netdev_path() will not find 791 * this interface. 792 */ 793 (void) ndi_prop_update_string(DDI_DEV_T_NONE, devinfo, "model", 794 "Ethernet controller"); 795 #endif 796 797 /* 798 * connect to the backend 799 */ 800 xnf_be_connect(xnfp); 801 802 return (DDI_SUCCESS); 803 804 failure_3: 805 kstat_delete(xnfp->xnf_kstat_aux); 806 #ifdef XPV_HVM_DRIVER 807 ec_unbind_evtchn(xnfp->xnf_evtchn); 808 xvdi_free_evtchn(devinfo); 809 #else 810 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 811 #endif 812 xnfp->xnf_evtchn = INVALID_EVTCHN; 813 814 failure_2: 815 xvdi_remove_event_handler(devinfo, XS_OE_STATE); 816 817 failure_1: 818 if (xnfp->xnf_gref_tx_head != (grant_ref_t)-1) 819 gnttab_free_grant_references(xnfp->xnf_gref_tx_head); 820 if (xnfp->xnf_gref_rx_head != (grant_ref_t)-1) 821 gnttab_free_grant_references(xnfp->xnf_gref_rx_head); 822 xnf_release_dma_resources(xnfp); 823 cv_destroy(&xnfp->xnf_cv); 824 mutex_destroy(&xnfp->xnf_rx_buf_mutex); 825 mutex_destroy(&xnfp->xnf_txlock); 826 mutex_destroy(&xnfp->xnf_intrlock); 827 828 failure: 829 kmem_free(xnfp, sizeof (*xnfp)); 830 if (macp != NULL) 831 mac_free(macp); 832 833 return (DDI_FAILURE); 834 } 835 836 /* detach(9E) -- Detach a device from the system */ 837 static int 838 xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) 839 { 840 xnf_t *xnfp; /* Our private device info */ 841 int i; 842 843 #ifdef XNF_DEBUG 844 if (xnfdebug & XNF_DEBUG_DDI) 845 printf("xnf_detach(0x%p)\n", (void *)devinfo); 846 #endif 847 848 xnfp = ddi_get_driver_private(devinfo); 849 850 switch (cmd) { 851 case DDI_SUSPEND: 852 #ifdef XPV_HVM_DRIVER 853 ec_unbind_evtchn(xnfp->xnf_evtchn); 854 xvdi_free_evtchn(devinfo); 855 #else 856 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 857 #endif 858 859 xvdi_suspend(devinfo); 860 861 mutex_enter(&xnfp->xnf_intrlock); 862 mutex_enter(&xnfp->xnf_txlock); 863 864 xnfp->xnf_evtchn = INVALID_EVTCHN; 865 xnfp->xnf_connected = B_FALSE; 866 mutex_exit(&xnfp->xnf_txlock); 867 mutex_exit(&xnfp->xnf_intrlock); 868 869 /* claim link to be down after disconnect */ 870 mac_link_update(xnfp->xnf_mh, LINK_STATE_DOWN); 871 return (DDI_SUCCESS); 872 873 case DDI_DETACH: 874 break; 875 876 default: 877 return (DDI_FAILURE); 878 } 879 880 if (xnfp->xnf_connected) 881 return (DDI_FAILURE); 882 883 /* Wait for receive buffers to be returned; give up after 5 seconds */ 884 i = 50; 885 886 mutex_enter(&xnfp->xnf_rx_buf_mutex); 887 while (xnfp->xnf_rx_bufs_outstanding > 0) { 888 mutex_exit(&xnfp->xnf_rx_buf_mutex); 889 delay(drv_usectohz(100000)); 890 if (--i == 0) { 891 cmn_err(CE_WARN, 892 "xnf%d: never reclaimed all the " 893 "receive buffers. Still have %d " 894 "buffers outstanding.", 895 ddi_get_instance(xnfp->xnf_devinfo), 896 xnfp->xnf_rx_bufs_outstanding); 897 return (DDI_FAILURE); 898 } 899 mutex_enter(&xnfp->xnf_rx_buf_mutex); 900 } 901 mutex_exit(&xnfp->xnf_rx_buf_mutex); 902 903 if (mac_unregister(xnfp->xnf_mh) != 0) 904 return (DDI_FAILURE); 905 906 kstat_delete(xnfp->xnf_kstat_aux); 907 908 /* Stop the receiver */ 909 xnf_stop(xnfp); 910 911 xvdi_remove_event_handler(devinfo, XS_OE_STATE); 912 913 /* Remove the interrupt */ 914 #ifdef XPV_HVM_DRIVER 915 ec_unbind_evtchn(xnfp->xnf_evtchn); 916 xvdi_free_evtchn(devinfo); 917 #else 918 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 919 #endif 920 921 /* Release any pending xmit mblks */ 922 xnf_release_mblks(xnfp); 923 924 /* Release all DMA resources */ 925 xnf_release_dma_resources(xnfp); 926 927 cv_destroy(&xnfp->xnf_cv); 928 mutex_destroy(&xnfp->xnf_rx_buf_mutex); 929 mutex_destroy(&xnfp->xnf_txlock); 930 mutex_destroy(&xnfp->xnf_intrlock); 931 932 kmem_free(xnfp, sizeof (*xnfp)); 933 934 return (DDI_SUCCESS); 935 } 936 937 /* 938 * xnf_set_mac_addr() -- set the physical network address on the board. 939 */ 940 /*ARGSUSED*/ 941 static int 942 xnf_set_mac_addr(void *arg, const uint8_t *macaddr) 943 { 944 xnf_t *xnfp = arg; 945 946 #ifdef XNF_DEBUG 947 if (xnfdebug & XNF_DEBUG_TRACE) 948 printf("xnf%d: set_mac_addr(0x%p): " 949 "%02x:%02x:%02x:%02x:%02x:%02x\n", 950 ddi_get_instance(xnfp->xnf_devinfo), 951 (void *)xnfp, macaddr[0], macaddr[1], macaddr[2], 952 macaddr[3], macaddr[4], macaddr[5]); 953 #endif 954 /* 955 * We can't set our macaddr. 956 * 957 * XXPV dme: Why not? 958 */ 959 return (ENOTSUP); 960 } 961 962 /* 963 * xnf_set_multicast() -- set (enable) or disable a multicast address. 964 * 965 * Program the hardware to enable/disable the multicast address 966 * in "mcast". Enable if "add" is true, disable if false. 967 */ 968 /*ARGSUSED*/ 969 static int 970 xnf_set_multicast(void *arg, boolean_t add, const uint8_t *mca) 971 { 972 xnf_t *xnfp = arg; 973 974 #ifdef XNF_DEBUG 975 if (xnfdebug & XNF_DEBUG_TRACE) 976 printf("xnf%d set_multicast(0x%p): " 977 "%02x:%02x:%02x:%02x:%02x:%02x\n", 978 ddi_get_instance(xnfp->xnf_devinfo), 979 (void *)xnfp, mca[0], mca[1], mca[2], 980 mca[3], mca[4], mca[5]); 981 #endif 982 983 /* 984 * XXPV dme: Ideally we'd relay the address to the backend for 985 * enabling. The protocol doesn't support that (interesting 986 * extension), so we simply succeed and hope that the relevant 987 * packets are going to arrive. 988 * 989 * If protocol support is added for enable/disable then we'll 990 * need to keep a list of those in use and re-add on resume. 991 */ 992 return (0); 993 } 994 995 /* 996 * xnf_set_promiscuous() -- set or reset promiscuous mode on the board 997 * 998 * Program the hardware to enable/disable promiscuous mode. 999 */ 1000 /*ARGSUSED*/ 1001 static int 1002 xnf_set_promiscuous(void *arg, boolean_t on) 1003 { 1004 xnf_t *xnfp = arg; 1005 1006 #ifdef XNF_DEBUG 1007 if (xnfdebug & XNF_DEBUG_TRACE) 1008 printf("xnf%d set_promiscuous(0x%p, %x)\n", 1009 ddi_get_instance(xnfp->xnf_devinfo), 1010 (void *)xnfp, on); 1011 #endif 1012 /* 1013 * We can't really do this, but we pretend that we can in 1014 * order that snoop will work. 1015 */ 1016 return (0); 1017 } 1018 1019 /* 1020 * Clean buffers that we have responses for from the transmit ring. 1021 */ 1022 static int 1023 xnf_clean_tx_ring(xnf_t *xnfp) 1024 { 1025 RING_IDX next_resp, i; 1026 struct tx_pktinfo *reap; 1027 int id; 1028 grant_ref_t ref; 1029 boolean_t work_to_do; 1030 1031 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 1032 1033 loop: 1034 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_tx_ring)) { 1035 /* 1036 * index of next transmission ack 1037 */ 1038 next_resp = xnfp->xnf_tx_ring.sring->rsp_prod; 1039 membar_consumer(); 1040 /* 1041 * Clean tx packets from ring that we have responses for 1042 */ 1043 for (i = xnfp->xnf_tx_ring.rsp_cons; i != next_resp; i++) { 1044 id = RING_GET_RESPONSE(&xnfp->xnf_tx_ring, i)->id; 1045 reap = &xnfp->xnf_tx_pkt_info[id]; 1046 ref = reap->grant_ref; 1047 /* 1048 * Return id to free list 1049 */ 1050 reap->id = xnfp->xnf_tx_pkt_id_list; 1051 xnfp->xnf_tx_pkt_id_list = id; 1052 if (gnttab_query_foreign_access(ref) != 0) 1053 panic("tx grant still in use " 1054 "by backend domain"); 1055 (void) ddi_dma_unbind_handle(reap->dma_handle); 1056 (void) gnttab_end_foreign_access_ref(ref, 1057 xnfp->xnf_tx_pages_readonly); 1058 gnttab_release_grant_reference(&xnfp->xnf_gref_tx_head, 1059 ref); 1060 freemsg(reap->mp); 1061 reap->mp = NULL; 1062 reap->grant_ref = GRANT_INVALID_REF; 1063 if (reap->bdesc != NULL) 1064 xnf_free_tx_buffer(reap->bdesc); 1065 reap->bdesc = NULL; 1066 } 1067 xnfp->xnf_tx_ring.rsp_cons = next_resp; 1068 membar_enter(); 1069 } 1070 1071 /* LINTED: constant in conditional context */ 1072 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_tx_ring, work_to_do); 1073 if (work_to_do) 1074 goto loop; 1075 1076 return (RING_FREE_REQUESTS(&xnfp->xnf_tx_ring)); 1077 } 1078 1079 /* 1080 * If we need to pull up data from either a packet that crosses a page 1081 * boundary or consisting of multiple mblks, do it here. We allocate 1082 * a page aligned buffer and copy the data into it. The header for the 1083 * allocated buffer is returned. (which is also allocated here) 1084 */ 1085 static struct xnf_buffer_desc * 1086 xnf_pullupmsg(xnf_t *xnfp, mblk_t *mp) 1087 { 1088 struct xnf_buffer_desc *bdesc; 1089 mblk_t *mptr; 1090 caddr_t bp; 1091 int len; 1092 1093 /* 1094 * get a xmit buffer from the xmit buffer pool 1095 */ 1096 mutex_enter(&xnfp->xnf_rx_buf_mutex); 1097 bdesc = xnf_get_tx_buffer(xnfp); 1098 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1099 if (bdesc == NULL) 1100 return (bdesc); 1101 /* 1102 * Copy the data into the buffer 1103 */ 1104 xnfp->xnf_stat_tx_pullup++; 1105 bp = bdesc->buf; 1106 for (mptr = mp; mptr != NULL; mptr = mptr->b_cont) { 1107 len = mptr->b_wptr - mptr->b_rptr; 1108 bcopy(mptr->b_rptr, bp, len); 1109 bp += len; 1110 } 1111 return (bdesc); 1112 } 1113 1114 void 1115 xnf_pseudo_cksum(caddr_t buf, int length) 1116 { 1117 struct ether_header *ehp; 1118 uint16_t sap, len, *stuff; 1119 uint32_t cksum; 1120 size_t offset; 1121 ipha_t *ipha; 1122 ipaddr_t src, dst; 1123 1124 ASSERT(length >= sizeof (*ehp)); 1125 ehp = (struct ether_header *)buf; 1126 1127 if (ntohs(ehp->ether_type) == VLAN_TPID) { 1128 struct ether_vlan_header *evhp; 1129 1130 ASSERT(length >= sizeof (*evhp)); 1131 evhp = (struct ether_vlan_header *)buf; 1132 sap = ntohs(evhp->ether_type); 1133 offset = sizeof (*evhp); 1134 } else { 1135 sap = ntohs(ehp->ether_type); 1136 offset = sizeof (*ehp); 1137 } 1138 1139 ASSERT(sap == ETHERTYPE_IP); 1140 1141 /* Packet should have been pulled up by the caller. */ 1142 if ((offset + sizeof (ipha_t)) > length) { 1143 cmn_err(CE_WARN, "xnf_pseudo_cksum: no room for checksum"); 1144 return; 1145 } 1146 1147 ipha = (ipha_t *)(buf + offset); 1148 1149 ASSERT(IPH_HDR_LENGTH(ipha) == IP_SIMPLE_HDR_LENGTH); 1150 1151 len = ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH; 1152 1153 switch (ipha->ipha_protocol) { 1154 case IPPROTO_TCP: 1155 stuff = IPH_TCPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 1156 cksum = IP_TCP_CSUM_COMP; 1157 break; 1158 case IPPROTO_UDP: 1159 stuff = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 1160 cksum = IP_UDP_CSUM_COMP; 1161 break; 1162 default: 1163 cmn_err(CE_WARN, "xnf_pseudo_cksum: unexpected protocol %d", 1164 ipha->ipha_protocol); 1165 return; 1166 } 1167 1168 src = ipha->ipha_src; 1169 dst = ipha->ipha_dst; 1170 1171 cksum += (dst >> 16) + (dst & 0xFFFF); 1172 cksum += (src >> 16) + (src & 0xFFFF); 1173 cksum += htons(len); 1174 1175 cksum = (cksum >> 16) + (cksum & 0xFFFF); 1176 cksum = (cksum >> 16) + (cksum & 0xFFFF); 1177 1178 ASSERT(cksum <= 0xFFFF); 1179 1180 *stuff = (uint16_t)(cksum ? cksum : ~cksum); 1181 } 1182 1183 /* 1184 * xnf_send_one() -- send a packet 1185 * 1186 * Called when a packet is ready to be transmitted. A pointer to an 1187 * M_DATA message that contains the packet is passed to this routine. 1188 * At least the complete LLC header is contained in the message's 1189 * first message block, and the remainder of the packet is contained 1190 * within additional M_DATA message blocks linked to the first 1191 * message block. 1192 * 1193 */ 1194 static boolean_t 1195 xnf_send_one(xnf_t *xnfp, mblk_t *mp) 1196 { 1197 struct xnf_buffer_desc *xmitbuf; 1198 struct tx_pktinfo *txp_info; 1199 mblk_t *mptr; 1200 ddi_dma_cookie_t dma_cookie; 1201 RING_IDX slot; 1202 int length = 0, i, pktlen = 0, rc, tx_id; 1203 int tx_ring_freespace, page_oops; 1204 uint_t ncookies; 1205 volatile netif_tx_request_t *txrp; 1206 caddr_t bufaddr; 1207 grant_ref_t ref; 1208 unsigned long mfn; 1209 uint32_t pflags; 1210 domid_t oeid; 1211 1212 #ifdef XNF_DEBUG 1213 if (xnfdebug & XNF_DEBUG_SEND) 1214 printf("xnf%d send(0x%p, 0x%p)\n", 1215 ddi_get_instance(xnfp->xnf_devinfo), 1216 (void *)xnfp, (void *)mp); 1217 #endif 1218 1219 ASSERT(mp != NULL); 1220 ASSERT(mp->b_next == NULL); 1221 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 1222 1223 tx_ring_freespace = xnf_clean_tx_ring(xnfp); 1224 ASSERT(tx_ring_freespace >= 0); 1225 1226 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 1227 xnfp->xnf_stat_tx_attempt++; 1228 /* 1229 * If there are no xmit ring slots available, return. 1230 */ 1231 if (tx_ring_freespace == 0) { 1232 xnfp->xnf_stat_tx_defer++; 1233 return (B_FALSE); /* Send should be retried */ 1234 } 1235 1236 slot = xnfp->xnf_tx_ring.req_prod_pvt; 1237 /* Count the number of mblks in message and compute packet size */ 1238 for (i = 0, mptr = mp; mptr != NULL; mptr = mptr->b_cont, i++) 1239 pktlen += (mptr->b_wptr - mptr->b_rptr); 1240 1241 /* Make sure packet isn't too large */ 1242 if (pktlen > XNF_FRAMESIZE) { 1243 cmn_err(CE_WARN, "xnf%d: oversized packet (%d bytes) dropped", 1244 ddi_get_instance(xnfp->xnf_devinfo), pktlen); 1245 freemsg(mp); 1246 return (B_TRUE); 1247 } 1248 1249 /* 1250 * Test if we cross a page boundary with our buffer 1251 */ 1252 page_oops = (i == 1) && 1253 (xnf_btop((size_t)mp->b_rptr) != 1254 xnf_btop((size_t)(mp->b_rptr + pktlen))); 1255 /* 1256 * XXPV - unfortunately, the Xen virtual net device currently 1257 * doesn't support multiple packet frags, so this will always 1258 * end up doing the pullup if we got more than one packet. 1259 */ 1260 if (i > xnf_max_tx_frags || page_oops) { 1261 if (page_oops) 1262 xnfp->xnf_stat_tx_pagebndry++; 1263 if ((xmitbuf = xnf_pullupmsg(xnfp, mp)) == NULL) { 1264 /* could not allocate resources? */ 1265 #ifdef XNF_DEBUG 1266 cmn_err(CE_WARN, "xnf%d: pullupmsg failed", 1267 ddi_get_instance(xnfp->xnf_devinfo)); 1268 #endif 1269 xnfp->xnf_stat_tx_defer++; 1270 return (B_FALSE); /* Retry send */ 1271 } 1272 bufaddr = xmitbuf->buf; 1273 } else { 1274 xmitbuf = NULL; 1275 bufaddr = (caddr_t)mp->b_rptr; 1276 } 1277 1278 /* set up data descriptor */ 1279 length = pktlen; 1280 1281 /* 1282 * Get packet id from free list 1283 */ 1284 tx_id = xnfp->xnf_tx_pkt_id_list; 1285 ASSERT(tx_id < NET_TX_RING_SIZE); 1286 txp_info = &xnfp->xnf_tx_pkt_info[tx_id]; 1287 xnfp->xnf_tx_pkt_id_list = txp_info->id; 1288 txp_info->id = tx_id; 1289 1290 /* Prepare for DMA mapping of tx buffer(s) */ 1291 rc = ddi_dma_addr_bind_handle(txp_info->dma_handle, 1292 NULL, bufaddr, length, DDI_DMA_WRITE | DDI_DMA_STREAMING, 1293 DDI_DMA_DONTWAIT, 0, &dma_cookie, &ncookies); 1294 if (rc != DDI_DMA_MAPPED) { 1295 ASSERT(rc != DDI_DMA_INUSE); 1296 ASSERT(rc != DDI_DMA_PARTIAL_MAP); 1297 /* 1298 * Return id to free list 1299 */ 1300 txp_info->id = xnfp->xnf_tx_pkt_id_list; 1301 xnfp->xnf_tx_pkt_id_list = tx_id; 1302 if (rc == DDI_DMA_NORESOURCES) { 1303 xnfp->xnf_stat_tx_defer++; 1304 return (B_FALSE); /* Retry later */ 1305 } 1306 #ifdef XNF_DEBUG 1307 cmn_err(CE_WARN, "xnf%d: bind_handle failed (%x)", 1308 ddi_get_instance(xnfp->xnf_devinfo), rc); 1309 #endif 1310 return (B_FALSE); 1311 } 1312 1313 ASSERT(ncookies == 1); 1314 ref = gnttab_claim_grant_reference(&xnfp->xnf_gref_tx_head); 1315 ASSERT((signed short)ref >= 0); 1316 mfn = xnf_btop(pa_to_ma((paddr_t)dma_cookie.dmac_laddress)); 1317 gnttab_grant_foreign_access_ref(ref, oeid, mfn, 1318 xnfp->xnf_tx_pages_readonly); 1319 txp_info->grant_ref = ref; 1320 txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot); 1321 txrp->gref = ref; 1322 txrp->size = dma_cookie.dmac_size; 1323 txrp->offset = (uintptr_t)bufaddr & PAGEOFFSET; 1324 txrp->id = tx_id; 1325 txrp->flags = 0; 1326 hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags); 1327 if (pflags != 0) { 1328 ASSERT(xnfp->xnf_cksum_offload); 1329 /* 1330 * If the local protocol stack requests checksum 1331 * offload we set the 'checksum blank' flag, 1332 * indicating to the peer that we need the checksum 1333 * calculated for us. 1334 * 1335 * We _don't_ set the validated flag, because we haven't 1336 * validated that the data and the checksum match. 1337 */ 1338 xnf_pseudo_cksum(bufaddr, length); 1339 txrp->flags |= NETTXF_csum_blank; 1340 xnfp->xnf_stat_tx_cksum_deferred++; 1341 } 1342 membar_producer(); 1343 xnfp->xnf_tx_ring.req_prod_pvt = slot + 1; 1344 1345 txp_info->mp = mp; 1346 txp_info->bdesc = xmitbuf; 1347 1348 xnfp->xnf_stat_opackets++; 1349 xnfp->xnf_stat_obytes += pktlen; 1350 1351 return (B_TRUE); /* successful transmit attempt */ 1352 } 1353 1354 mblk_t * 1355 xnf_send(void *arg, mblk_t *mp) 1356 { 1357 xnf_t *xnfp = arg; 1358 mblk_t *next; 1359 boolean_t sent_something = B_FALSE; 1360 1361 mutex_enter(&xnfp->xnf_txlock); 1362 1363 /* 1364 * Transmission attempts should be impossible without having 1365 * previously called xnf_start(). 1366 */ 1367 ASSERT(xnfp->xnf_running); 1368 1369 /* 1370 * Wait for getting connected to the backend 1371 */ 1372 while (!xnfp->xnf_connected) { 1373 cv_wait(&xnfp->xnf_cv, &xnfp->xnf_txlock); 1374 } 1375 1376 while (mp != NULL) { 1377 next = mp->b_next; 1378 mp->b_next = NULL; 1379 1380 if (!xnf_send_one(xnfp, mp)) { 1381 mp->b_next = next; 1382 break; 1383 } 1384 1385 mp = next; 1386 sent_something = B_TRUE; 1387 } 1388 1389 if (sent_something) { 1390 boolean_t notify; 1391 1392 /* LINTED: constant in conditional context */ 1393 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring, 1394 notify); 1395 if (notify) 1396 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1397 } 1398 1399 mutex_exit(&xnfp->xnf_txlock); 1400 1401 return (mp); 1402 } 1403 1404 /* 1405 * xnf_intr() -- ring interrupt service routine 1406 */ 1407 static uint_t 1408 xnf_intr(caddr_t arg) 1409 { 1410 xnf_t *xnfp = (xnf_t *)arg; 1411 int tx_ring_space; 1412 1413 mutex_enter(&xnfp->xnf_intrlock); 1414 1415 /* spurious intr */ 1416 if (!xnfp->xnf_connected) { 1417 mutex_exit(&xnfp->xnf_intrlock); 1418 xnfp->xnf_stat_unclaimed_interrupts++; 1419 return (DDI_INTR_UNCLAIMED); 1420 } 1421 1422 #ifdef XNF_DEBUG 1423 if (xnfdebug & XNF_DEBUG_INT) 1424 printf("xnf%d intr(0x%p)\n", 1425 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1426 #endif 1427 if (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1428 mblk_t *mp; 1429 1430 if (xnfp->xnf_rx_hvcopy) 1431 mp = xnf_process_hvcopy_recv(xnfp); 1432 else 1433 mp = xnf_process_recv(xnfp); 1434 1435 if (mp != NULL) 1436 mac_rx(xnfp->xnf_mh, xnfp->xnf_rx_handle, mp); 1437 } 1438 1439 /* 1440 * Clean tx ring and try to start any blocked xmit streams if 1441 * there is now some space. 1442 */ 1443 mutex_enter(&xnfp->xnf_txlock); 1444 tx_ring_space = xnf_clean_tx_ring(xnfp); 1445 mutex_exit(&xnfp->xnf_txlock); 1446 if (tx_ring_space > XNF_TX_FREE_THRESH) { 1447 mutex_exit(&xnfp->xnf_intrlock); 1448 mac_tx_update(xnfp->xnf_mh); 1449 mutex_enter(&xnfp->xnf_intrlock); 1450 } 1451 1452 xnfp->xnf_stat_interrupts++; 1453 mutex_exit(&xnfp->xnf_intrlock); 1454 return (DDI_INTR_CLAIMED); /* indicate that the interrupt was for us */ 1455 } 1456 1457 /* 1458 * xnf_start() -- start the board receiving and enable interrupts. 1459 */ 1460 static int 1461 xnf_start(void *arg) 1462 { 1463 xnf_t *xnfp = arg; 1464 1465 #ifdef XNF_DEBUG 1466 if (xnfdebug & XNF_DEBUG_TRACE) 1467 printf("xnf%d start(0x%p)\n", 1468 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1469 #endif 1470 1471 mutex_enter(&xnfp->xnf_intrlock); 1472 mutex_enter(&xnfp->xnf_txlock); 1473 1474 /* Accept packets from above. */ 1475 xnfp->xnf_running = B_TRUE; 1476 1477 mutex_exit(&xnfp->xnf_txlock); 1478 mutex_exit(&xnfp->xnf_intrlock); 1479 1480 return (0); 1481 } 1482 1483 /* xnf_stop() - disable hardware */ 1484 static void 1485 xnf_stop(void *arg) 1486 { 1487 xnf_t *xnfp = arg; 1488 1489 #ifdef XNF_DEBUG 1490 if (xnfdebug & XNF_DEBUG_TRACE) 1491 printf("xnf%d stop(0x%p)\n", 1492 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1493 #endif 1494 1495 mutex_enter(&xnfp->xnf_intrlock); 1496 mutex_enter(&xnfp->xnf_txlock); 1497 1498 xnfp->xnf_running = B_FALSE; 1499 1500 mutex_exit(&xnfp->xnf_txlock); 1501 mutex_exit(&xnfp->xnf_intrlock); 1502 } 1503 1504 /* 1505 * Driver private functions follow 1506 */ 1507 1508 /* 1509 * Hang buffer on rx ring 1510 */ 1511 static void 1512 rx_buffer_hang(xnf_t *xnfp, struct xnf_buffer_desc *bdesc) 1513 { 1514 volatile netif_rx_request_t *reqp; 1515 RING_IDX hang_ix; 1516 grant_ref_t ref; 1517 domid_t oeid; 1518 1519 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 1520 1521 ASSERT(MUTEX_HELD(&xnfp->xnf_intrlock)); 1522 reqp = RING_GET_REQUEST(&xnfp->xnf_rx_ring, 1523 xnfp->xnf_rx_ring.req_prod_pvt); 1524 hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0)); 1525 ASSERT(xnfp->xnf_rxpkt_bufptr[hang_ix] == NULL); 1526 if (bdesc->grant_ref == GRANT_INVALID_REF) { 1527 ref = gnttab_claim_grant_reference(&xnfp->xnf_gref_rx_head); 1528 ASSERT((signed short)ref >= 0); 1529 bdesc->grant_ref = ref; 1530 if (xnfp->xnf_rx_hvcopy) { 1531 pfn_t pfn = xnf_btop(bdesc->buf_phys); 1532 mfn_t mfn = pfn_to_mfn(pfn); 1533 1534 gnttab_grant_foreign_access_ref(ref, oeid, mfn, 0); 1535 } else { 1536 gnttab_grant_foreign_transfer_ref(ref, oeid, 0); 1537 } 1538 } 1539 reqp->id = hang_ix; 1540 reqp->gref = bdesc->grant_ref; 1541 bdesc->id = hang_ix; 1542 xnfp->xnf_rxpkt_bufptr[hang_ix] = bdesc; 1543 membar_producer(); 1544 xnfp->xnf_rx_ring.req_prod_pvt++; 1545 } 1546 1547 static mblk_t * 1548 xnf_process_hvcopy_recv(xnf_t *xnfp) 1549 { 1550 netif_rx_response_t *rxpkt; 1551 mblk_t *mp, *head, *tail; 1552 struct xnf_buffer_desc *bdesc; 1553 boolean_t hwcsum = B_FALSE, notify, work_to_do; 1554 size_t len; 1555 1556 /* 1557 * in loop over unconsumed responses, we do: 1558 * 1. get a response 1559 * 2. take corresponding buffer off recv. ring 1560 * 3. indicate this by setting slot to NULL 1561 * 4. create a new message and 1562 * 5. copy data in, adjust ptr 1563 * 1564 * outside loop: 1565 * 7. make sure no more data has arrived; kick HV 1566 */ 1567 1568 head = tail = NULL; 1569 1570 loop: 1571 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1572 1573 /* 1. */ 1574 rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring, 1575 xnfp->xnf_rx_ring.rsp_cons); 1576 1577 DTRACE_PROBE4(got_PKT, int, (int)rxpkt->id, int, 1578 (int)rxpkt->offset, 1579 int, (int)rxpkt->flags, int, (int)rxpkt->status); 1580 1581 /* 1582 * 2. 1583 * Take buffer off of receive ring 1584 */ 1585 hwcsum = B_FALSE; 1586 bdesc = xnfp->xnf_rxpkt_bufptr[rxpkt->id]; 1587 /* 3 */ 1588 xnfp->xnf_rxpkt_bufptr[rxpkt->id] = NULL; 1589 ASSERT(bdesc->id == rxpkt->id); 1590 mp = NULL; 1591 if (!xnfp->xnf_running) { 1592 DTRACE_PROBE4(pkt_dropped, int, rxpkt->status, 1593 char *, bdesc->buf, int, rxpkt->offset, 1594 char *, ((char *)bdesc->buf) + rxpkt->offset); 1595 xnfp->xnf_stat_drop++; 1596 /* 1597 * re-hang the buffer 1598 */ 1599 rx_buffer_hang(xnfp, bdesc); 1600 } else if (rxpkt->status <= 0) { 1601 DTRACE_PROBE4(pkt_status_negative, int, rxpkt->status, 1602 char *, bdesc->buf, int, rxpkt->offset, 1603 char *, ((char *)bdesc->buf) + rxpkt->offset); 1604 xnfp->xnf_stat_errrx++; 1605 if (rxpkt->status == 0) 1606 xnfp->xnf_stat_runt++; 1607 if (rxpkt->status == NETIF_RSP_ERROR) 1608 xnfp->xnf_stat_mac_rcv_error++; 1609 if (rxpkt->status == NETIF_RSP_DROPPED) 1610 xnfp->xnf_stat_norxbuf++; 1611 /* 1612 * re-hang the buffer 1613 */ 1614 rx_buffer_hang(xnfp, bdesc); 1615 } else { 1616 grant_ref_t ref = bdesc->grant_ref; 1617 struct xnf_buffer_desc *new_bdesc; 1618 unsigned long off = rxpkt->offset; 1619 1620 DTRACE_PROBE4(pkt_status_ok, int, rxpkt->status, 1621 char *, bdesc->buf, int, rxpkt->offset, 1622 char *, ((char *)bdesc->buf) + rxpkt->offset); 1623 len = rxpkt->status; 1624 ASSERT(off + len <= PAGEOFFSET); 1625 if (ref == GRANT_INVALID_REF) { 1626 mp = NULL; 1627 new_bdesc = bdesc; 1628 cmn_err(CE_WARN, "Bad rx grant reference %d " 1629 "from dom %d", ref, 1630 xvdi_get_oeid(xnfp->xnf_devinfo)); 1631 goto luckless; 1632 } 1633 /* 1634 * Release ref which we'll be re-claiming in 1635 * rx_buffer_hang(). 1636 */ 1637 bdesc->grant_ref = GRANT_INVALID_REF; 1638 (void) gnttab_end_foreign_access_ref(ref, 0); 1639 gnttab_release_grant_reference(&xnfp->xnf_gref_rx_head, 1640 ref); 1641 if (rxpkt->flags & NETRXF_data_validated) 1642 hwcsum = B_TRUE; 1643 1644 /* 1645 * XXPV for the initial implementation of HVcopy, 1646 * create a new msg and copy in the data 1647 */ 1648 /* 4. */ 1649 if ((mp = allocb(len, BPRI_MED)) == NULL) { 1650 /* 1651 * Couldn't get buffer to copy to, 1652 * drop this data, and re-hang 1653 * the buffer on the ring. 1654 */ 1655 xnfp->xnf_stat_norxbuf++; 1656 DTRACE_PROBE(alloc_nix); 1657 } else { 1658 /* 5. */ 1659 DTRACE_PROBE(alloc_ok); 1660 bcopy(bdesc->buf + off, mp->b_wptr, 1661 len); 1662 mp->b_wptr += len; 1663 } 1664 new_bdesc = bdesc; 1665 luckless: 1666 1667 /* Re-hang old or hang new buffer. */ 1668 rx_buffer_hang(xnfp, new_bdesc); 1669 } 1670 if (mp) { 1671 if (hwcsum) { 1672 /* 1673 * See comments in xnf_process_recv(). 1674 */ 1675 1676 (void) hcksum_assoc(mp, NULL, 1677 NULL, 0, 0, 0, 0, 1678 HCK_FULLCKSUM | 1679 HCK_FULLCKSUM_OK, 1680 0); 1681 xnfp->xnf_stat_rx_cksum_no_need++; 1682 } 1683 if (head == NULL) { 1684 head = tail = mp; 1685 } else { 1686 tail->b_next = mp; 1687 tail = mp; 1688 } 1689 1690 ASSERT(mp->b_next == NULL); 1691 1692 xnfp->xnf_stat_ipackets++; 1693 xnfp->xnf_stat_rbytes += len; 1694 } 1695 1696 xnfp->xnf_rx_ring.rsp_cons++; 1697 1698 xnfp->xnf_stat_hvcopy_packet_processed++; 1699 } 1700 1701 /* 7. */ 1702 /* 1703 * Has more data come in since we started? 1704 */ 1705 /* LINTED: constant in conditional context */ 1706 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_rx_ring, work_to_do); 1707 if (work_to_do) 1708 goto loop; 1709 1710 /* 1711 * Indicate to the backend that we have re-filled the receive 1712 * ring. 1713 */ 1714 /* LINTED: constant in conditional context */ 1715 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify); 1716 if (notify) 1717 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1718 1719 return (head); 1720 } 1721 1722 /* Process all queued received packets */ 1723 static mblk_t * 1724 xnf_process_recv(xnf_t *xnfp) 1725 { 1726 volatile netif_rx_response_t *rxpkt; 1727 mblk_t *mp, *head, *tail; 1728 struct xnf_buffer_desc *bdesc; 1729 extern mblk_t *desballoc(unsigned char *, size_t, uint_t, frtn_t *); 1730 boolean_t hwcsum = B_FALSE, notify, work_to_do; 1731 size_t len; 1732 pfn_t pfn; 1733 long cnt; 1734 1735 head = tail = NULL; 1736 loop: 1737 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1738 1739 rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring, 1740 xnfp->xnf_rx_ring.rsp_cons); 1741 1742 /* 1743 * Take buffer off of receive ring 1744 */ 1745 hwcsum = B_FALSE; 1746 bdesc = xnfp->xnf_rxpkt_bufptr[rxpkt->id]; 1747 xnfp->xnf_rxpkt_bufptr[rxpkt->id] = NULL; 1748 ASSERT(bdesc->id == rxpkt->id); 1749 mp = NULL; 1750 if (!xnfp->xnf_running) { 1751 xnfp->xnf_stat_drop++; 1752 /* 1753 * re-hang the buffer 1754 */ 1755 rx_buffer_hang(xnfp, bdesc); 1756 } else if (rxpkt->status <= 0) { 1757 xnfp->xnf_stat_errrx++; 1758 if (rxpkt->status == 0) 1759 xnfp->xnf_stat_runt++; 1760 if (rxpkt->status == NETIF_RSP_ERROR) 1761 xnfp->xnf_stat_mac_rcv_error++; 1762 if (rxpkt->status == NETIF_RSP_DROPPED) 1763 xnfp->xnf_stat_norxbuf++; 1764 /* 1765 * re-hang the buffer 1766 */ 1767 rx_buffer_hang(xnfp, bdesc); 1768 } else { 1769 grant_ref_t ref = bdesc->grant_ref; 1770 struct xnf_buffer_desc *new_bdesc; 1771 unsigned long off = rxpkt->offset; 1772 unsigned long mfn; 1773 1774 len = rxpkt->status; 1775 ASSERT(off + len <= PAGEOFFSET); 1776 if (ref == GRANT_INVALID_REF) { 1777 mp = NULL; 1778 new_bdesc = bdesc; 1779 cmn_err(CE_WARN, "Bad rx grant reference %d " 1780 "from dom %d", ref, 1781 xvdi_get_oeid(xnfp->xnf_devinfo)); 1782 goto luckless; 1783 } 1784 bdesc->grant_ref = GRANT_INVALID_REF; 1785 mfn = gnttab_end_foreign_transfer_ref(ref); 1786 ASSERT(mfn != MFN_INVALID); 1787 ASSERT(hat_getpfnum(kas.a_hat, bdesc->buf) == 1788 PFN_INVALID); 1789 1790 gnttab_release_grant_reference(&xnfp->xnf_gref_rx_head, 1791 ref); 1792 reassign_pfn(xnf_btop(bdesc->buf_phys), mfn); 1793 hat_devload(kas.a_hat, bdesc->buf, PAGESIZE, 1794 xnf_btop(bdesc->buf_phys), 1795 PROT_READ | PROT_WRITE, HAT_LOAD); 1796 balloon_drv_added(1); 1797 1798 if (rxpkt->flags & NETRXF_data_validated) 1799 hwcsum = B_TRUE; 1800 if (len <= xnf_rx_bcopy_thresh) { 1801 /* 1802 * For small buffers, just copy the data 1803 * and send the copy upstream. 1804 */ 1805 new_bdesc = NULL; 1806 } else { 1807 /* 1808 * We send a pointer to this data upstream; 1809 * we need a new buffer to replace this one. 1810 */ 1811 mutex_enter(&xnfp->xnf_rx_buf_mutex); 1812 new_bdesc = xnf_get_buffer(xnfp); 1813 if (new_bdesc != NULL) { 1814 xnfp->xnf_rx_bufs_outstanding++; 1815 } else { 1816 xnfp->xnf_stat_rx_no_ringbuf++; 1817 } 1818 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1819 } 1820 1821 if (new_bdesc == NULL) { 1822 /* 1823 * Don't have a new ring buffer; bcopy the data 1824 * from the buffer, and preserve the 1825 * original buffer 1826 */ 1827 if ((mp = allocb(len, BPRI_MED)) == NULL) { 1828 /* 1829 * Could't get buffer to copy to, 1830 * drop this data, and re-hang 1831 * the buffer on the ring. 1832 */ 1833 xnfp->xnf_stat_norxbuf++; 1834 } else { 1835 bcopy(bdesc->buf + off, mp->b_wptr, 1836 len); 1837 } 1838 /* 1839 * Give the buffer page back to xen 1840 */ 1841 pfn = xnf_btop(bdesc->buf_phys); 1842 cnt = balloon_free_pages(1, &mfn, bdesc->buf, 1843 &pfn); 1844 if (cnt != 1) { 1845 cmn_err(CE_WARN, "unable to give a " 1846 "page back to the hypervisor\n"); 1847 } 1848 new_bdesc = bdesc; 1849 } else { 1850 if ((mp = desballoc((unsigned char *)bdesc->buf, 1851 off + len, 0, (frtn_t *)bdesc)) == NULL) { 1852 /* 1853 * Couldn't get mblk to pass recv data 1854 * up with, free the old ring buffer 1855 */ 1856 xnfp->xnf_stat_norxbuf++; 1857 xnf_rcv_complete(bdesc); 1858 goto luckless; 1859 } 1860 (void) ddi_dma_sync(bdesc->dma_handle, 1861 0, 0, DDI_DMA_SYNC_FORCPU); 1862 1863 mp->b_wptr += off; 1864 mp->b_rptr += off; 1865 } 1866 luckless: 1867 if (mp) 1868 mp->b_wptr += len; 1869 /* re-hang old or hang new buffer */ 1870 rx_buffer_hang(xnfp, new_bdesc); 1871 } 1872 if (mp) { 1873 if (hwcsum) { 1874 /* 1875 * If the peer says that the data has 1876 * been validated then we declare that 1877 * the full checksum has been 1878 * verified. 1879 * 1880 * We don't look at the "checksum 1881 * blank" flag, and hence could have a 1882 * packet here that we are asserting 1883 * is good with a blank checksum. 1884 * 1885 * The hardware checksum offload 1886 * specification says that we must 1887 * provide the actual checksum as well 1888 * as an assertion that it is valid, 1889 * but the protocol stack doesn't 1890 * actually use it and some other 1891 * drivers don't bother, so we don't. 1892 * If it was necessary we could grovel 1893 * in the packet to find it. 1894 */ 1895 1896 (void) hcksum_assoc(mp, NULL, 1897 NULL, 0, 0, 0, 0, 1898 HCK_FULLCKSUM | 1899 HCK_FULLCKSUM_OK, 1900 0); 1901 xnfp->xnf_stat_rx_cksum_no_need++; 1902 } 1903 if (head == NULL) { 1904 head = tail = mp; 1905 } else { 1906 tail->b_next = mp; 1907 tail = mp; 1908 } 1909 1910 ASSERT(mp->b_next == NULL); 1911 1912 xnfp->xnf_stat_ipackets++; 1913 xnfp->xnf_stat_rbytes += len; 1914 } 1915 1916 xnfp->xnf_rx_ring.rsp_cons++; 1917 } 1918 1919 /* 1920 * Has more data come in since we started? 1921 */ 1922 /* LINTED: constant in conditional context */ 1923 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_rx_ring, work_to_do); 1924 if (work_to_do) 1925 goto loop; 1926 1927 /* 1928 * Indicate to the backend that we have re-filled the receive 1929 * ring. 1930 */ 1931 /* LINTED: constant in conditional context */ 1932 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify); 1933 if (notify) 1934 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1935 1936 return (head); 1937 } 1938 1939 /* Called when the upper layers free a message we passed upstream */ 1940 static void 1941 xnf_rcv_complete(struct xnf_buffer_desc *bdesc) 1942 { 1943 xnf_t *xnfp = bdesc->xnfp; 1944 pfn_t pfn; 1945 long cnt; 1946 1947 /* One less outstanding receive buffer */ 1948 mutex_enter(&xnfp->xnf_rx_buf_mutex); 1949 --xnfp->xnf_rx_bufs_outstanding; 1950 /* 1951 * Return buffer to the free list, unless the free list is getting 1952 * too large. XXPV - this threshold may need tuning. 1953 */ 1954 if (xnfp->xnf_rx_descs_free < xnf_rx_bufs_lowat) { 1955 /* 1956 * Unmap the page, and hand the machine page back 1957 * to xen so it can be re-used as a backend net buffer. 1958 */ 1959 pfn = xnf_btop(bdesc->buf_phys); 1960 cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn); 1961 if (cnt != 1) { 1962 cmn_err(CE_WARN, "unable to give a page back to the " 1963 "hypervisor\n"); 1964 } 1965 1966 bdesc->next = xnfp->xnf_free_list; 1967 xnfp->xnf_free_list = bdesc; 1968 xnfp->xnf_rx_descs_free++; 1969 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1970 } else { 1971 /* 1972 * We can return everything here since we have a free buffer 1973 * that we have not given the backing page for back to xen. 1974 */ 1975 --xnfp->xnf_rx_buffer_count; 1976 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1977 (void) ddi_dma_unbind_handle(bdesc->dma_handle); 1978 ddi_dma_mem_free(&bdesc->acc_handle); 1979 ddi_dma_free_handle(&bdesc->dma_handle); 1980 kmem_free(bdesc, sizeof (*bdesc)); 1981 } 1982 } 1983 1984 /* 1985 * xnf_alloc_dma_resources() -- initialize the drivers structures 1986 */ 1987 static int 1988 xnf_alloc_dma_resources(xnf_t *xnfp) 1989 { 1990 dev_info_t *devinfo = xnfp->xnf_devinfo; 1991 int i; 1992 size_t len; 1993 ddi_dma_cookie_t dma_cookie; 1994 uint_t ncookies; 1995 struct xnf_buffer_desc *bdesc; 1996 int rc; 1997 caddr_t rptr; 1998 1999 xnfp->xnf_n_rx = NET_RX_RING_SIZE; 2000 xnfp->xnf_max_rx_bufs = xnf_rx_bufs_hiwat; 2001 2002 xnfp->xnf_n_tx = NET_TX_RING_SIZE; 2003 2004 /* 2005 * The code below allocates all the DMA data structures that 2006 * need to be released when the driver is detached. 2007 * 2008 * First allocate handles for mapping (virtual address) pointers to 2009 * transmit data buffers to physical addresses 2010 */ 2011 for (i = 0; i < xnfp->xnf_n_tx; i++) { 2012 if ((rc = ddi_dma_alloc_handle(devinfo, 2013 &tx_buffer_dma_attr, DDI_DMA_SLEEP, 0, 2014 &xnfp->xnf_tx_pkt_info[i].dma_handle)) != DDI_SUCCESS) 2015 return (DDI_FAILURE); 2016 } 2017 2018 /* 2019 * Allocate page for the transmit descriptor ring. 2020 */ 2021 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 2022 DDI_DMA_SLEEP, 0, &xnfp->xnf_tx_ring_dma_handle) != DDI_SUCCESS) 2023 goto alloc_error; 2024 2025 if (ddi_dma_mem_alloc(xnfp->xnf_tx_ring_dma_handle, 2026 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 2027 DDI_DMA_SLEEP, 0, &rptr, &len, 2028 &xnfp->xnf_tx_ring_dma_acchandle) != DDI_SUCCESS) { 2029 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2030 xnfp->xnf_tx_ring_dma_handle = NULL; 2031 goto alloc_error; 2032 } 2033 2034 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_tx_ring_dma_handle, NULL, 2035 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 2036 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 2037 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 2038 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2039 xnfp->xnf_tx_ring_dma_handle = NULL; 2040 xnfp->xnf_tx_ring_dma_acchandle = NULL; 2041 if (rc == DDI_DMA_NORESOURCES) 2042 goto alloc_error; 2043 else 2044 goto error; 2045 } 2046 2047 ASSERT(ncookies == 1); 2048 bzero(rptr, PAGESIZE); 2049 /* LINTED: constant in conditional context */ 2050 SHARED_RING_INIT((netif_tx_sring_t *)rptr); 2051 /* LINTED: constant in conditional context */ 2052 FRONT_RING_INIT(&xnfp->xnf_tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE); 2053 xnfp->xnf_tx_ring_phys_addr = dma_cookie.dmac_laddress; 2054 2055 /* 2056 * Allocate page for the receive descriptor ring. 2057 */ 2058 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 2059 DDI_DMA_SLEEP, 0, &xnfp->xnf_rx_ring_dma_handle) != DDI_SUCCESS) 2060 goto alloc_error; 2061 2062 if (ddi_dma_mem_alloc(xnfp->xnf_rx_ring_dma_handle, 2063 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 2064 DDI_DMA_SLEEP, 0, &rptr, &len, 2065 &xnfp->xnf_rx_ring_dma_acchandle) != DDI_SUCCESS) { 2066 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2067 xnfp->xnf_rx_ring_dma_handle = NULL; 2068 goto alloc_error; 2069 } 2070 2071 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_rx_ring_dma_handle, NULL, 2072 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 2073 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 2074 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 2075 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2076 xnfp->xnf_rx_ring_dma_handle = NULL; 2077 xnfp->xnf_rx_ring_dma_acchandle = NULL; 2078 if (rc == DDI_DMA_NORESOURCES) 2079 goto alloc_error; 2080 else 2081 goto error; 2082 } 2083 2084 ASSERT(ncookies == 1); 2085 bzero(rptr, PAGESIZE); 2086 /* LINTED: constant in conditional context */ 2087 SHARED_RING_INIT((netif_rx_sring_t *)rptr); 2088 /* LINTED: constant in conditional context */ 2089 FRONT_RING_INIT(&xnfp->xnf_rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE); 2090 xnfp->xnf_rx_ring_phys_addr = dma_cookie.dmac_laddress; 2091 2092 /* 2093 * Preallocate receive buffers for each receive descriptor. 2094 */ 2095 2096 /* Set up the "free list" of receive buffer descriptors */ 2097 for (i = 0; i < xnfp->xnf_n_rx; i++) { 2098 if ((bdesc = xnf_alloc_buffer(xnfp)) == NULL) 2099 goto alloc_error; 2100 bdesc->next = xnfp->xnf_free_list; 2101 xnfp->xnf_free_list = bdesc; 2102 } 2103 2104 return (DDI_SUCCESS); 2105 2106 alloc_error: 2107 cmn_err(CE_WARN, "xnf%d: could not allocate enough DMA memory", 2108 ddi_get_instance(xnfp->xnf_devinfo)); 2109 error: 2110 xnf_release_dma_resources(xnfp); 2111 return (DDI_FAILURE); 2112 } 2113 2114 /* 2115 * Release all DMA resources in the opposite order from acquisition 2116 * Should not be called until all outstanding esballoc buffers 2117 * have been returned. 2118 */ 2119 static void 2120 xnf_release_dma_resources(xnf_t *xnfp) 2121 { 2122 int i; 2123 2124 /* 2125 * Free receive buffers which are currently associated with 2126 * descriptors 2127 */ 2128 for (i = 0; i < xnfp->xnf_n_rx; i++) { 2129 struct xnf_buffer_desc *bp; 2130 2131 if ((bp = xnfp->xnf_rxpkt_bufptr[i]) == NULL) 2132 continue; 2133 xnf_free_buffer(bp); 2134 xnfp->xnf_rxpkt_bufptr[i] = NULL; 2135 } 2136 2137 /* Free the receive ring buffer */ 2138 if (xnfp->xnf_rx_ring_dma_acchandle != NULL) { 2139 (void) ddi_dma_unbind_handle(xnfp->xnf_rx_ring_dma_handle); 2140 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 2141 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2142 xnfp->xnf_rx_ring_dma_acchandle = NULL; 2143 } 2144 /* Free the transmit ring buffer */ 2145 if (xnfp->xnf_tx_ring_dma_acchandle != NULL) { 2146 (void) ddi_dma_unbind_handle(xnfp->xnf_tx_ring_dma_handle); 2147 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 2148 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2149 xnfp->xnf_tx_ring_dma_acchandle = NULL; 2150 } 2151 2152 /* 2153 * Free handles for mapping (virtual address) pointers to 2154 * transmit data buffers to physical addresses 2155 */ 2156 for (i = 0; i < xnfp->xnf_n_tx; i++) { 2157 if (xnfp->xnf_tx_pkt_info[i].dma_handle != NULL) { 2158 ddi_dma_free_handle( 2159 &xnfp->xnf_tx_pkt_info[i].dma_handle); 2160 } 2161 } 2162 2163 } 2164 2165 static void 2166 xnf_release_mblks(xnf_t *xnfp) 2167 { 2168 int i; 2169 2170 for (i = 0; i < xnfp->xnf_n_tx; i++) { 2171 if (xnfp->xnf_tx_pkt_info[i].mp == NULL) 2172 continue; 2173 freemsg(xnfp->xnf_tx_pkt_info[i].mp); 2174 xnfp->xnf_tx_pkt_info[i].mp = NULL; 2175 (void) ddi_dma_unbind_handle( 2176 xnfp->xnf_tx_pkt_info[i].dma_handle); 2177 } 2178 } 2179 2180 /* 2181 * Remove a xmit buffer descriptor from the head of the free list and return 2182 * a pointer to it. If no buffers on list, attempt to allocate a new one. 2183 * Called with the tx_buf_mutex held. 2184 */ 2185 static struct xnf_buffer_desc * 2186 xnf_get_tx_buffer(xnf_t *xnfp) 2187 { 2188 struct xnf_buffer_desc *bdesc; 2189 2190 bdesc = xnfp->xnf_tx_free_list; 2191 if (bdesc != NULL) { 2192 xnfp->xnf_tx_free_list = bdesc->next; 2193 } else { 2194 bdesc = xnf_alloc_tx_buffer(xnfp); 2195 } 2196 return (bdesc); 2197 } 2198 2199 /* 2200 * Remove a buffer descriptor from the head of the free list and return 2201 * a pointer to it. If no buffers on list, attempt to allocate a new one. 2202 * Called with the rx_buf_mutex held. 2203 */ 2204 static struct xnf_buffer_desc * 2205 xnf_get_buffer(xnf_t *xnfp) 2206 { 2207 struct xnf_buffer_desc *bdesc; 2208 2209 bdesc = xnfp->xnf_free_list; 2210 if (bdesc != NULL) { 2211 xnfp->xnf_free_list = bdesc->next; 2212 xnfp->xnf_rx_descs_free--; 2213 } else { 2214 bdesc = xnf_alloc_buffer(xnfp); 2215 } 2216 return (bdesc); 2217 } 2218 2219 /* 2220 * Free a xmit buffer back to the xmit free list 2221 */ 2222 static void 2223 xnf_free_tx_buffer(struct xnf_buffer_desc *bp) 2224 { 2225 xnf_t *xnfp = bp->xnfp; 2226 2227 mutex_enter(&xnfp->xnf_tx_buf_mutex); 2228 bp->next = xnfp->xnf_tx_free_list; 2229 xnfp->xnf_tx_free_list = bp; 2230 mutex_exit(&xnfp->xnf_tx_buf_mutex); 2231 } 2232 2233 /* 2234 * Put a buffer descriptor onto the head of the free list. 2235 * for page-flip: 2236 * We can't really free these buffers back to the kernel 2237 * since we have given away their backing page to be used 2238 * by the back end net driver. 2239 * for hvcopy: 2240 * release all the memory 2241 */ 2242 static void 2243 xnf_free_buffer(struct xnf_buffer_desc *bdesc) 2244 { 2245 xnf_t *xnfp = bdesc->xnfp; 2246 2247 mutex_enter(&xnfp->xnf_rx_buf_mutex); 2248 if (xnfp->xnf_rx_hvcopy) { 2249 if (ddi_dma_unbind_handle(bdesc->dma_handle) != DDI_SUCCESS) 2250 goto out; 2251 ddi_dma_mem_free(&bdesc->acc_handle); 2252 ddi_dma_free_handle(&bdesc->dma_handle); 2253 kmem_free(bdesc, sizeof (*bdesc)); 2254 xnfp->xnf_rx_buffer_count--; 2255 } else { 2256 bdesc->next = xnfp->xnf_free_list; 2257 xnfp->xnf_free_list = bdesc; 2258 xnfp->xnf_rx_descs_free++; 2259 } 2260 out: 2261 mutex_exit(&xnfp->xnf_rx_buf_mutex); 2262 } 2263 2264 /* 2265 * Allocate a DMA-able xmit buffer, including a structure to 2266 * keep track of the buffer. Called with tx_buf_mutex held. 2267 */ 2268 static struct xnf_buffer_desc * 2269 xnf_alloc_tx_buffer(xnf_t *xnfp) 2270 { 2271 struct xnf_buffer_desc *bdesc; 2272 size_t len; 2273 2274 if ((bdesc = kmem_zalloc(sizeof (*bdesc), KM_NOSLEEP)) == NULL) 2275 return (NULL); 2276 2277 /* allocate a DMA access handle for receive buffer */ 2278 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &tx_buffer_dma_attr, 2279 0, 0, &bdesc->dma_handle) != DDI_SUCCESS) 2280 goto failure; 2281 2282 /* Allocate DMA-able memory for transmit buffer */ 2283 if (ddi_dma_mem_alloc(bdesc->dma_handle, 2284 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0, 2285 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 2286 goto failure_1; 2287 2288 bdesc->xnfp = xnfp; 2289 xnfp->xnf_tx_buffer_count++; 2290 2291 return (bdesc); 2292 2293 failure_1: 2294 ddi_dma_free_handle(&bdesc->dma_handle); 2295 2296 failure: 2297 kmem_free(bdesc, sizeof (*bdesc)); 2298 return (NULL); 2299 } 2300 2301 /* 2302 * Allocate a DMA-able receive buffer, including a structure to 2303 * keep track of the buffer. Called with rx_buf_mutex held. 2304 */ 2305 static struct xnf_buffer_desc * 2306 xnf_alloc_buffer(xnf_t *xnfp) 2307 { 2308 struct xnf_buffer_desc *bdesc; 2309 size_t len; 2310 uint_t ncookies; 2311 ddi_dma_cookie_t dma_cookie; 2312 long cnt; 2313 pfn_t pfn; 2314 2315 if (xnfp->xnf_rx_buffer_count >= xnfp->xnf_max_rx_bufs) 2316 return (NULL); 2317 2318 if ((bdesc = kmem_zalloc(sizeof (*bdesc), KM_NOSLEEP)) == NULL) 2319 return (NULL); 2320 2321 /* allocate a DMA access handle for receive buffer */ 2322 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &rx_buffer_dma_attr, 2323 0, 0, &bdesc->dma_handle) != DDI_SUCCESS) 2324 goto failure; 2325 2326 /* Allocate DMA-able memory for receive buffer */ 2327 if (ddi_dma_mem_alloc(bdesc->dma_handle, 2328 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0, 2329 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 2330 goto failure_1; 2331 2332 /* bind to virtual address of buffer to get physical address */ 2333 if (ddi_dma_addr_bind_handle(bdesc->dma_handle, NULL, 2334 bdesc->buf, PAGESIZE, DDI_DMA_READ | DDI_DMA_STREAMING, 2335 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies) != DDI_DMA_MAPPED) 2336 goto failure_2; 2337 2338 bdesc->buf_phys = dma_cookie.dmac_laddress; 2339 bdesc->xnfp = xnfp; 2340 if (xnfp->xnf_rx_hvcopy) { 2341 bdesc->free_rtn.free_func = xnf_copy_rcv_complete; 2342 } else { 2343 bdesc->free_rtn.free_func = xnf_rcv_complete; 2344 } 2345 bdesc->free_rtn.free_arg = (char *)bdesc; 2346 bdesc->grant_ref = GRANT_INVALID_REF; 2347 ASSERT(ncookies == 1); 2348 2349 xnfp->xnf_rx_buffer_count++; 2350 2351 if (!xnfp->xnf_rx_hvcopy) { 2352 /* 2353 * Unmap the page, and hand the machine page back 2354 * to xen so it can be used as a backend net buffer. 2355 */ 2356 pfn = xnf_btop(bdesc->buf_phys); 2357 cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn); 2358 if (cnt != 1) { 2359 cmn_err(CE_WARN, "unable to give a page back to the " 2360 "hypervisor\n"); 2361 } 2362 } 2363 2364 return (bdesc); 2365 2366 failure_2: 2367 ddi_dma_mem_free(&bdesc->acc_handle); 2368 2369 failure_1: 2370 ddi_dma_free_handle(&bdesc->dma_handle); 2371 2372 failure: 2373 kmem_free(bdesc, sizeof (*bdesc)); 2374 return (NULL); 2375 } 2376 2377 /* 2378 * Statistics. 2379 */ 2380 static char *xnf_aux_statistics[] = { 2381 "tx_cksum_deferred", 2382 "rx_cksum_no_need", 2383 "interrupts", 2384 "unclaimed_interrupts", 2385 "tx_pullup", 2386 "tx_pagebndry", 2387 "tx_attempt", 2388 "rx_no_ringbuf", 2389 "hvcopy_packet_processed", 2390 }; 2391 2392 static int 2393 xnf_kstat_aux_update(kstat_t *ksp, int flag) 2394 { 2395 xnf_t *xnfp; 2396 kstat_named_t *knp; 2397 2398 if (flag != KSTAT_READ) 2399 return (EACCES); 2400 2401 xnfp = ksp->ks_private; 2402 knp = ksp->ks_data; 2403 2404 /* 2405 * Assignment order must match that of the names in 2406 * xnf_aux_statistics. 2407 */ 2408 (knp++)->value.ui64 = xnfp->xnf_stat_tx_cksum_deferred; 2409 (knp++)->value.ui64 = xnfp->xnf_stat_rx_cksum_no_need; 2410 2411 (knp++)->value.ui64 = xnfp->xnf_stat_interrupts; 2412 (knp++)->value.ui64 = xnfp->xnf_stat_unclaimed_interrupts; 2413 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pullup; 2414 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pagebndry; 2415 (knp++)->value.ui64 = xnfp->xnf_stat_tx_attempt; 2416 (knp++)->value.ui64 = xnfp->xnf_stat_rx_no_ringbuf; 2417 2418 (knp++)->value.ui64 = xnfp->xnf_stat_hvcopy_packet_processed; 2419 2420 return (0); 2421 } 2422 2423 static boolean_t 2424 xnf_kstat_init(xnf_t *xnfp) 2425 { 2426 int nstat = sizeof (xnf_aux_statistics) / 2427 sizeof (xnf_aux_statistics[0]); 2428 char **cp = xnf_aux_statistics; 2429 kstat_named_t *knp; 2430 2431 /* 2432 * Create and initialise kstats. 2433 */ 2434 if ((xnfp->xnf_kstat_aux = kstat_create("xnf", 2435 ddi_get_instance(xnfp->xnf_devinfo), 2436 "aux_statistics", "net", KSTAT_TYPE_NAMED, 2437 nstat, 0)) == NULL) 2438 return (B_FALSE); 2439 2440 xnfp->xnf_kstat_aux->ks_private = xnfp; 2441 xnfp->xnf_kstat_aux->ks_update = xnf_kstat_aux_update; 2442 2443 knp = xnfp->xnf_kstat_aux->ks_data; 2444 while (nstat > 0) { 2445 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 2446 2447 knp++; 2448 cp++; 2449 nstat--; 2450 } 2451 2452 kstat_install(xnfp->xnf_kstat_aux); 2453 2454 return (B_TRUE); 2455 } 2456 2457 static int 2458 xnf_stat(void *arg, uint_t stat, uint64_t *val) 2459 { 2460 xnf_t *xnfp = arg; 2461 2462 mutex_enter(&xnfp->xnf_intrlock); 2463 mutex_enter(&xnfp->xnf_txlock); 2464 2465 #define mac_stat(q, r) \ 2466 case (MAC_STAT_##q): \ 2467 *val = xnfp->xnf_stat_##r; \ 2468 break 2469 2470 #define ether_stat(q, r) \ 2471 case (ETHER_STAT_##q): \ 2472 *val = xnfp->xnf_stat_##r; \ 2473 break 2474 2475 switch (stat) { 2476 2477 mac_stat(IPACKETS, ipackets); 2478 mac_stat(OPACKETS, opackets); 2479 mac_stat(RBYTES, rbytes); 2480 mac_stat(OBYTES, obytes); 2481 mac_stat(NORCVBUF, norxbuf); 2482 mac_stat(IERRORS, errrx); 2483 mac_stat(NOXMTBUF, tx_defer); 2484 2485 ether_stat(MACRCV_ERRORS, mac_rcv_error); 2486 ether_stat(TOOSHORT_ERRORS, runt); 2487 2488 /* always claim to be in full duplex mode */ 2489 case ETHER_STAT_LINK_DUPLEX: 2490 *val = LINK_DUPLEX_FULL; 2491 break; 2492 2493 /* always claim to be at 1Gb/s link speed */ 2494 case MAC_STAT_IFSPEED: 2495 *val = 1000000000ull; 2496 break; 2497 2498 default: 2499 mutex_exit(&xnfp->xnf_txlock); 2500 mutex_exit(&xnfp->xnf_intrlock); 2501 2502 return (ENOTSUP); 2503 } 2504 2505 #undef mac_stat 2506 #undef ether_stat 2507 2508 mutex_exit(&xnfp->xnf_txlock); 2509 mutex_exit(&xnfp->xnf_intrlock); 2510 2511 return (0); 2512 } 2513 2514 /*ARGSUSED*/ 2515 static void 2516 xnf_blank(void *arg, time_t ticks, uint_t count) 2517 { 2518 /* 2519 * XXPV dme: blanking is not currently implemented. 2520 * 2521 * It's not obvious how to use the 'ticks' argument here. 2522 * 2523 * 'Count' might be used as an indicator of how to set 2524 * rsp_event when posting receive buffers to the rx_ring. It 2525 * would replace the code at the tail of xnf_process_recv() 2526 * that simply indicates that the next completed packet should 2527 * cause an interrupt. 2528 */ 2529 } 2530 2531 static void 2532 xnf_resources(void *arg) 2533 { 2534 xnf_t *xnfp = arg; 2535 mac_rx_fifo_t mrf; 2536 2537 mrf.mrf_type = MAC_RX_FIFO; 2538 mrf.mrf_blank = xnf_blank; 2539 mrf.mrf_arg = (void *)xnfp; 2540 mrf.mrf_normal_blank_time = 128; /* XXPV dme: see xnf_blank() */ 2541 mrf.mrf_normal_pkt_count = 8; /* XXPV dme: see xnf_blank() */ 2542 2543 xnfp->xnf_rx_handle = mac_resource_add(xnfp->xnf_mh, 2544 (mac_resource_t *)&mrf); 2545 } 2546 2547 /*ARGSUSED*/ 2548 static void 2549 xnf_ioctl(void *arg, queue_t *q, mblk_t *mp) 2550 { 2551 miocnak(q, mp, 0, EINVAL); 2552 } 2553 2554 static boolean_t 2555 xnf_getcapab(void *arg, mac_capab_t cap, void *cap_data) 2556 { 2557 xnf_t *xnfp = arg; 2558 2559 switch (cap) { 2560 case MAC_CAPAB_HCKSUM: { 2561 uint32_t *capab = cap_data; 2562 2563 /* 2564 * Whilst the flag used to communicate with the IO 2565 * domain is called "NETTXF_csum_blank", the checksum 2566 * in the packet must contain the pseudo-header 2567 * checksum and not zero. 2568 * 2569 * To help out the IO domain, we might use 2570 * HCKSUM_INET_PARTIAL. Unfortunately our stack will 2571 * then use checksum offload for IPv6 packets, which 2572 * the IO domain can't handle. 2573 * 2574 * As a result, we declare outselves capable of 2575 * HCKSUM_INET_FULL_V4. This means that we receive 2576 * IPv4 packets from the stack with a blank checksum 2577 * field and must insert the pseudo-header checksum 2578 * before passing the packet to the IO domain. 2579 */ 2580 if (xnfp->xnf_cksum_offload) 2581 *capab = HCKSUM_INET_FULL_V4; 2582 else 2583 *capab = 0; 2584 break; 2585 } 2586 2587 case MAC_CAPAB_POLL: 2588 /* Just return B_TRUE. */ 2589 break; 2590 2591 default: 2592 return (B_FALSE); 2593 } 2594 2595 return (B_TRUE); 2596 } 2597 2598 /*ARGSUSED*/ 2599 static void 2600 oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 2601 void *arg, void *impl_data) 2602 { 2603 xnf_t *xnfp = ddi_get_driver_private(dip); 2604 XenbusState new_state = *(XenbusState *)impl_data; 2605 2606 ASSERT(xnfp != NULL); 2607 2608 switch (new_state) { 2609 case XenbusStateConnected: 2610 mutex_enter(&xnfp->xnf_intrlock); 2611 mutex_enter(&xnfp->xnf_txlock); 2612 2613 xnfp->xnf_connected = B_TRUE; 2614 /* 2615 * wake up threads wanting to send data to backend, 2616 * but got blocked due to backend is not ready 2617 */ 2618 cv_broadcast(&xnfp->xnf_cv); 2619 2620 mutex_exit(&xnfp->xnf_txlock); 2621 mutex_exit(&xnfp->xnf_intrlock); 2622 2623 /* 2624 * kick backend in case it missed any tx request 2625 * in the TX ring buffer 2626 */ 2627 ec_notify_via_evtchn(xnfp->xnf_evtchn); 2628 2629 /* 2630 * there maybe already queued rx data in the RX ring 2631 * sent by backend after it gets connected but before 2632 * we see its state change here, so we call our intr 2633 * handling routine to handle them, if any 2634 */ 2635 (void) xnf_intr((caddr_t)xnfp); 2636 2637 /* mark as link up after get connected */ 2638 mac_link_update(xnfp->xnf_mh, LINK_STATE_UP); 2639 2640 break; 2641 2642 default: 2643 break; 2644 } 2645 } 2646 2647 /* 2648 * Check whether backend is capable of and willing to talk 2649 * to us via hypervisor copy, as opposed to page flip. 2650 */ 2651 static boolean_t 2652 xnf_hvcopy_peer_status(dev_info_t *devinfo) 2653 { 2654 int be_rx_copy; 2655 int err; 2656 2657 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(devinfo), 2658 "feature-rx-copy", "%d", &be_rx_copy); 2659 /* 2660 * If we fail to read the store we assume that the key is 2661 * absent, implying an older domain at the far end. Older 2662 * domains cannot do HV copy (we assume ..). 2663 */ 2664 if (err != 0) 2665 be_rx_copy = 0; 2666 2667 return (be_rx_copy?B_TRUE:B_FALSE); 2668 } 2669