1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * 29 * Copyright (c) 2004 Christian Limpach. 30 * All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. This section intentionally left blank. 41 * 4. The name of the author may not be used to endorse or promote products 42 * derived from this software without specific prior written permission. 43 * 44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 49 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 53 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 */ 55 /* 56 * Section 3 of the above license was updated in response to bug 6379571. 57 */ 58 59 /* 60 * xnf.c - Nemo-based network driver for domU 61 */ 62 63 #include <sys/types.h> 64 #include <sys/errno.h> 65 #include <sys/param.h> 66 #include <sys/sysmacros.h> 67 #include <sys/systm.h> 68 #include <sys/stream.h> 69 #include <sys/strsubr.h> 70 #include <sys/conf.h> 71 #include <sys/ddi.h> 72 #include <sys/devops.h> 73 #include <sys/sunddi.h> 74 #include <sys/sunndi.h> 75 #include <sys/dlpi.h> 76 #include <sys/ethernet.h> 77 #include <sys/strsun.h> 78 #include <sys/pattr.h> 79 #include <inet/ip.h> 80 #include <inet/ip_impl.h> 81 #include <sys/gld.h> 82 #include <sys/modctl.h> 83 #include <sys/mac.h> 84 #include <sys/mac_ether.h> 85 #include <sys/bootinfo.h> 86 #include <sys/mach_mmu.h> 87 #ifdef XPV_HVM_DRIVER 88 #include <sys/xpv_support.h> 89 #include <sys/hypervisor.h> 90 #else 91 #include <sys/hypervisor.h> 92 #include <sys/evtchn_impl.h> 93 #include <sys/balloon_impl.h> 94 #endif 95 #include <xen/public/io/netif.h> 96 #include <sys/gnttab.h> 97 #include <xen/sys/xendev.h> 98 #include <sys/sdt.h> 99 100 #include <io/xnf.h> 101 102 103 /* 104 * Declarations and Module Linkage 105 */ 106 107 #if defined(DEBUG) || defined(__lint) 108 #define XNF_DEBUG 109 int xnfdebug = 0; 110 #endif 111 112 /* 113 * On a 32 bit PAE system physical and machine addresses are larger 114 * than 32 bits. ddi_btop() on such systems take an unsigned long 115 * argument, and so addresses above 4G are truncated before ddi_btop() 116 * gets to see them. To avoid this, code the shift operation here. 117 */ 118 #define xnf_btop(addr) ((addr) >> PAGESHIFT) 119 120 boolean_t xnf_cksum_offload = B_TRUE; 121 122 /* Default value for hypervisor-based copy operations */ 123 boolean_t xnf_rx_hvcopy = B_TRUE; 124 125 /* 126 * Should pages used for transmit be readonly for the peer? 127 */ 128 boolean_t xnf_tx_pages_readonly = B_FALSE; 129 /* 130 * Packets under this size are bcopied instead of using desballoc. 131 * Choose a value > XNF_FRAMESIZE (1514) to force the receive path to 132 * always copy. 133 */ 134 unsigned int xnf_rx_bcopy_thresh = 64; 135 136 unsigned int xnf_max_tx_frags = 1; 137 138 /* Required system entry points */ 139 static int xnf_attach(dev_info_t *, ddi_attach_cmd_t); 140 static int xnf_detach(dev_info_t *, ddi_detach_cmd_t); 141 142 /* Required driver entry points for Nemo */ 143 static int xnf_start(void *); 144 static void xnf_stop(void *); 145 static int xnf_set_mac_addr(void *, const uint8_t *); 146 static int xnf_set_multicast(void *, boolean_t, const uint8_t *); 147 static int xnf_set_promiscuous(void *, boolean_t); 148 static mblk_t *xnf_send(void *, mblk_t *); 149 static uint_t xnf_intr(caddr_t); 150 static int xnf_stat(void *, uint_t, uint64_t *); 151 static void xnf_blank(void *, time_t, uint_t); 152 static void xnf_resources(void *); 153 static void xnf_ioctl(void *, queue_t *, mblk_t *); 154 static boolean_t xnf_getcapab(void *, mac_capab_t, void *); 155 156 /* Driver private functions */ 157 static int xnf_alloc_dma_resources(xnf_t *); 158 static void xnf_release_dma_resources(xnf_t *); 159 static mblk_t *xnf_process_recv(xnf_t *); 160 static void xnf_rcv_complete(struct xnf_buffer_desc *); 161 static void xnf_release_mblks(xnf_t *); 162 static struct xnf_buffer_desc *xnf_alloc_tx_buffer(xnf_t *); 163 static struct xnf_buffer_desc *xnf_alloc_buffer(xnf_t *); 164 static struct xnf_buffer_desc *xnf_get_tx_buffer(xnf_t *); 165 static struct xnf_buffer_desc *xnf_get_buffer(xnf_t *); 166 static void xnf_free_buffer(struct xnf_buffer_desc *); 167 static void xnf_free_tx_buffer(struct xnf_buffer_desc *); 168 void xnf_send_driver_status(int, int); 169 static void rx_buffer_hang(xnf_t *, struct xnf_buffer_desc *); 170 static int xnf_clean_tx_ring(xnf_t *); 171 static void oe_state_change(dev_info_t *, ddi_eventcookie_t, 172 void *, void *); 173 static mblk_t *xnf_process_hvcopy_recv(xnf_t *xnfp); 174 static boolean_t xnf_hvcopy_peer_status(dev_info_t *devinfo); 175 static boolean_t xnf_kstat_init(xnf_t *xnfp); 176 177 /* 178 * XXPV dme: remove MC_IOCTL? 179 */ 180 static mac_callbacks_t xnf_callbacks = { 181 MC_RESOURCES | MC_IOCTL | MC_GETCAPAB, 182 xnf_stat, 183 xnf_start, 184 xnf_stop, 185 xnf_set_promiscuous, 186 xnf_set_multicast, 187 xnf_set_mac_addr, 188 xnf_send, 189 xnf_resources, 190 xnf_ioctl, 191 xnf_getcapab 192 }; 193 194 #define GRANT_INVALID_REF 0 195 const int xnf_rx_bufs_lowat = 4 * NET_RX_RING_SIZE; 196 const int xnf_rx_bufs_hiwat = 8 * NET_RX_RING_SIZE; /* default max */ 197 198 /* DMA attributes for network ring buffer */ 199 static ddi_dma_attr_t ringbuf_dma_attr = { 200 DMA_ATTR_V0, /* version of this structure */ 201 0, /* lowest usable address */ 202 0xffffffffffffffffULL, /* highest usable address */ 203 0x7fffffff, /* maximum DMAable byte count */ 204 MMU_PAGESIZE, /* alignment in bytes */ 205 0x7ff, /* bitmap of burst sizes */ 206 1, /* minimum transfer */ 207 0xffffffffU, /* maximum transfer */ 208 0xffffffffffffffffULL, /* maximum segment length */ 209 1, /* maximum number of segments */ 210 1, /* granularity */ 211 0, /* flags (reserved) */ 212 }; 213 214 /* DMA attributes for transmit data */ 215 static ddi_dma_attr_t tx_buffer_dma_attr = { 216 DMA_ATTR_V0, /* version of this structure */ 217 0, /* lowest usable address */ 218 0xffffffffffffffffULL, /* highest usable address */ 219 0x7fffffff, /* maximum DMAable byte count */ 220 MMU_PAGESIZE, /* alignment in bytes */ 221 0x7ff, /* bitmap of burst sizes */ 222 1, /* minimum transfer */ 223 0xffffffffU, /* maximum transfer */ 224 0xffffffffffffffffULL, /* maximum segment length */ 225 1, /* maximum number of segments */ 226 1, /* granularity */ 227 0, /* flags (reserved) */ 228 }; 229 230 /* DMA attributes for a receive buffer */ 231 static ddi_dma_attr_t rx_buffer_dma_attr = { 232 DMA_ATTR_V0, /* version of this structure */ 233 0, /* lowest usable address */ 234 0xffffffffffffffffULL, /* highest usable address */ 235 0x7fffffff, /* maximum DMAable byte count */ 236 MMU_PAGESIZE, /* alignment in bytes */ 237 0x7ff, /* bitmap of burst sizes */ 238 1, /* minimum transfer */ 239 0xffffffffU, /* maximum transfer */ 240 0xffffffffffffffffULL, /* maximum segment length */ 241 1, /* maximum number of segments */ 242 1, /* granularity */ 243 0, /* flags (reserved) */ 244 }; 245 246 /* DMA access attributes for registers and descriptors */ 247 static ddi_device_acc_attr_t accattr = { 248 DDI_DEVICE_ATTR_V0, 249 DDI_STRUCTURE_LE_ACC, /* This is a little-endian device */ 250 DDI_STRICTORDER_ACC 251 }; 252 253 /* DMA access attributes for data: NOT to be byte swapped. */ 254 static ddi_device_acc_attr_t data_accattr = { 255 DDI_DEVICE_ATTR_V0, 256 DDI_NEVERSWAP_ACC, 257 DDI_STRICTORDER_ACC 258 }; 259 260 unsigned char xnf_broadcastaddr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 261 int xnf_diagnose = 0; /* Patchable global for diagnostic purposes */ 262 263 DDI_DEFINE_STREAM_OPS(xnf_dev_ops, nulldev, nulldev, xnf_attach, xnf_detach, 264 nodev, NULL, D_MP, NULL, ddi_quiesce_not_supported); 265 266 static struct modldrv xnf_modldrv = { 267 &mod_driverops, 268 "Virtual Ethernet driver", 269 &xnf_dev_ops 270 }; 271 272 static struct modlinkage modlinkage = { 273 MODREV_1, &xnf_modldrv, NULL 274 }; 275 276 int 277 _init(void) 278 { 279 int r; 280 281 mac_init_ops(&xnf_dev_ops, "xnf"); 282 r = mod_install(&modlinkage); 283 if (r != DDI_SUCCESS) 284 mac_fini_ops(&xnf_dev_ops); 285 286 return (r); 287 } 288 289 int 290 _fini(void) 291 { 292 return (EBUSY); /* XXPV dme: should be removable */ 293 } 294 295 int 296 _info(struct modinfo *modinfop) 297 { 298 return (mod_info(&modlinkage, modinfop)); 299 } 300 301 static int 302 xnf_setup_rings(xnf_t *xnfp) 303 { 304 int ix, err; 305 RING_IDX i; 306 struct xnf_buffer_desc *bdesc, *rbp; 307 struct xenbus_device *xsd; 308 domid_t oeid; 309 310 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 311 xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 312 313 if (xnfp->xnf_tx_ring_ref != GRANT_INVALID_REF) 314 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 315 316 err = gnttab_grant_foreign_access(oeid, 317 xnf_btop(pa_to_ma(xnfp->xnf_tx_ring_phys_addr)), 0); 318 if (err <= 0) { 319 err = -err; 320 xenbus_dev_error(xsd, err, "granting access to tx ring page"); 321 goto out; 322 } 323 xnfp->xnf_tx_ring_ref = (grant_ref_t)err; 324 325 if (xnfp->xnf_rx_ring_ref != GRANT_INVALID_REF) 326 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 327 328 err = gnttab_grant_foreign_access(oeid, 329 xnf_btop(pa_to_ma(xnfp->xnf_rx_ring_phys_addr)), 0); 330 if (err <= 0) { 331 err = -err; 332 xenbus_dev_error(xsd, err, "granting access to rx ring page"); 333 goto out; 334 } 335 xnfp->xnf_rx_ring_ref = (grant_ref_t)err; 336 337 338 mutex_enter(&xnfp->xnf_intrlock); 339 340 /* 341 * Cleanup the TX ring. We just clean up any valid tx_pktinfo structs 342 * and reset the ring. Note that this can lose packets after a resume, 343 * but we expect to stagger on. 344 */ 345 mutex_enter(&xnfp->xnf_txlock); 346 347 for (i = 0; i < xnfp->xnf_n_tx; i++) { 348 struct tx_pktinfo *txp = &xnfp->xnf_tx_pkt_info[i]; 349 350 txp->id = i + 1; 351 352 if (txp->grant_ref == GRANT_INVALID_REF) { 353 ASSERT(txp->mp == NULL); 354 ASSERT(txp->bdesc == NULL); 355 continue; 356 } 357 358 if (gnttab_query_foreign_access(txp->grant_ref) != 0) 359 panic("tx grant still in use by backend domain"); 360 361 freemsg(txp->mp); 362 txp->mp = NULL; 363 364 (void) ddi_dma_unbind_handle(txp->dma_handle); 365 366 if (txp->bdesc != NULL) { 367 xnf_free_tx_buffer(txp->bdesc); 368 txp->bdesc = NULL; 369 } 370 371 (void) gnttab_end_foreign_access_ref(txp->grant_ref, 372 xnfp->xnf_tx_pages_readonly); 373 gnttab_release_grant_reference(&xnfp->xnf_gref_tx_head, 374 txp->grant_ref); 375 txp->grant_ref = GRANT_INVALID_REF; 376 } 377 378 xnfp->xnf_tx_pkt_id_list = 0; 379 xnfp->xnf_tx_ring.rsp_cons = 0; 380 xnfp->xnf_tx_ring.req_prod_pvt = 0; 381 382 /* LINTED: constant in conditional context */ 383 SHARED_RING_INIT(xnfp->xnf_tx_ring.sring); 384 385 mutex_exit(&xnfp->xnf_txlock); 386 387 /* 388 * Rebuild the RX ring. We have to rebuild the RX ring because some of 389 * our pages are currently flipped out/granted so we can't just free 390 * the RX buffers. Reclaim any unprocessed recv buffers, they won't be 391 * useable anyway since the mfn's they refer to are no longer valid. 392 * Grant the backend domain access to each hung rx buffer. 393 */ 394 i = xnfp->xnf_rx_ring.rsp_cons; 395 while (i++ != xnfp->xnf_rx_ring.sring->req_prod) { 396 volatile netif_rx_request_t *rxrp; 397 398 rxrp = RING_GET_REQUEST(&xnfp->xnf_rx_ring, i); 399 ix = rxrp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0); 400 rbp = xnfp->xnf_rxpkt_bufptr[ix]; 401 if (rbp != NULL) { 402 grant_ref_t ref = rbp->grant_ref; 403 404 ASSERT(ref != GRANT_INVALID_REF); 405 if (xnfp->xnf_rx_hvcopy) { 406 pfn_t pfn = xnf_btop(rbp->buf_phys); 407 mfn_t mfn = pfn_to_mfn(pfn); 408 409 gnttab_grant_foreign_access_ref(ref, oeid, 410 mfn, 0); 411 } else { 412 gnttab_grant_foreign_transfer_ref(ref, 413 oeid, 0); 414 } 415 rxrp->id = ix; 416 rxrp->gref = ref; 417 } 418 } 419 420 /* 421 * Reset the ring pointers to initial state. 422 * Hang buffers for any empty ring slots. 423 */ 424 xnfp->xnf_rx_ring.rsp_cons = 0; 425 xnfp->xnf_rx_ring.req_prod_pvt = 0; 426 427 /* LINTED: constant in conditional context */ 428 SHARED_RING_INIT(xnfp->xnf_rx_ring.sring); 429 430 for (i = 0; i < NET_RX_RING_SIZE; i++) { 431 xnfp->xnf_rx_ring.req_prod_pvt = i; 432 if (xnfp->xnf_rxpkt_bufptr[i] != NULL) 433 continue; 434 if ((bdesc = xnf_get_buffer(xnfp)) == NULL) 435 break; 436 rx_buffer_hang(xnfp, bdesc); 437 } 438 xnfp->xnf_rx_ring.req_prod_pvt = i; 439 /* LINTED: constant in conditional context */ 440 RING_PUSH_REQUESTS(&xnfp->xnf_rx_ring); 441 442 mutex_exit(&xnfp->xnf_intrlock); 443 444 return (0); 445 446 out: 447 if (xnfp->xnf_tx_ring_ref != GRANT_INVALID_REF) 448 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 449 xnfp->xnf_tx_ring_ref = GRANT_INVALID_REF; 450 451 if (xnfp->xnf_rx_ring_ref != GRANT_INVALID_REF) 452 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 453 xnfp->xnf_rx_ring_ref = GRANT_INVALID_REF; 454 455 return (err); 456 } 457 458 459 /* Called when the upper layers free a message we passed upstream */ 460 static void 461 xnf_copy_rcv_complete(struct xnf_buffer_desc *bdesc) 462 { 463 (void) ddi_dma_unbind_handle(bdesc->dma_handle); 464 ddi_dma_mem_free(&bdesc->acc_handle); 465 ddi_dma_free_handle(&bdesc->dma_handle); 466 kmem_free(bdesc, sizeof (*bdesc)); 467 } 468 469 470 /* 471 * Connect driver to back end, called to set up communication with 472 * back end driver both initially and on resume after restore/migrate. 473 */ 474 void 475 xnf_be_connect(xnf_t *xnfp) 476 { 477 const char *message; 478 xenbus_transaction_t xbt; 479 struct xenbus_device *xsd; 480 char *xsname; 481 int err; 482 483 ASSERT(!xnfp->xnf_connected); 484 485 xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 486 xsname = xvdi_get_xsname(xnfp->xnf_devinfo); 487 488 err = xnf_setup_rings(xnfp); 489 if (err != 0) { 490 cmn_err(CE_WARN, "failed to set up tx/rx rings"); 491 xenbus_dev_error(xsd, err, "setting up ring"); 492 return; 493 } 494 495 again: 496 err = xenbus_transaction_start(&xbt); 497 if (err != 0) { 498 xenbus_dev_error(xsd, EIO, "starting transaction"); 499 return; 500 } 501 502 err = xenbus_printf(xbt, xsname, "tx-ring-ref", "%u", 503 xnfp->xnf_tx_ring_ref); 504 if (err != 0) { 505 message = "writing tx ring-ref"; 506 goto abort_transaction; 507 } 508 509 err = xenbus_printf(xbt, xsname, "rx-ring-ref", "%u", 510 xnfp->xnf_rx_ring_ref); 511 if (err != 0) { 512 message = "writing rx ring-ref"; 513 goto abort_transaction; 514 } 515 516 err = xenbus_printf(xbt, xsname, "event-channel", "%u", 517 xnfp->xnf_evtchn); 518 if (err != 0) { 519 message = "writing event-channel"; 520 goto abort_transaction; 521 } 522 523 err = xenbus_printf(xbt, xsname, "feature-rx-notify", "%d", 1); 524 if (err != 0) { 525 message = "writing feature-rx-notify"; 526 goto abort_transaction; 527 } 528 529 if (!xnfp->xnf_tx_pages_readonly) { 530 err = xenbus_printf(xbt, xsname, "feature-tx-writable", 531 "%d", 1); 532 if (err != 0) { 533 message = "writing feature-tx-writable"; 534 goto abort_transaction; 535 } 536 } 537 538 err = xenbus_printf(xbt, xsname, "feature-no-csum-offload", "%d", 539 xnfp->xnf_cksum_offload ? 0 : 1); 540 if (err != 0) { 541 message = "writing feature-no-csum-offload"; 542 goto abort_transaction; 543 } 544 err = xenbus_printf(xbt, xsname, "request-rx-copy", "%d", 545 xnfp->xnf_rx_hvcopy ? 1 : 0); 546 if (err != 0) { 547 message = "writing request-rx-copy"; 548 goto abort_transaction; 549 } 550 551 err = xenbus_printf(xbt, xsname, "state", "%d", XenbusStateConnected); 552 if (err != 0) { 553 message = "writing frontend XenbusStateConnected"; 554 goto abort_transaction; 555 } 556 557 err = xenbus_transaction_end(xbt, 0); 558 if (err != 0) { 559 if (err == EAGAIN) 560 goto again; 561 xenbus_dev_error(xsd, err, "completing transaction"); 562 } 563 564 return; 565 566 abort_transaction: 567 (void) xenbus_transaction_end(xbt, 1); 568 xenbus_dev_error(xsd, err, "%s", message); 569 } 570 571 /* 572 * Read config info from xenstore 573 */ 574 void 575 xnf_read_config(xnf_t *xnfp) 576 { 577 char mac[ETHERADDRL * 3]; 578 int err, be_no_cksum_offload; 579 580 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->xnf_devinfo), "mac", 581 "%s", (char *)&mac[0]); 582 if (err != 0) { 583 /* 584 * bad: we're supposed to be set up with a proper mac 585 * addr. at this point 586 */ 587 cmn_err(CE_WARN, "%s%d: no mac address", 588 ddi_driver_name(xnfp->xnf_devinfo), 589 ddi_get_instance(xnfp->xnf_devinfo)); 590 return; 591 } 592 if (ether_aton(mac, xnfp->xnf_mac_addr) != ETHERADDRL) { 593 err = ENOENT; 594 xenbus_dev_error(xvdi_get_xsd(xnfp->xnf_devinfo), ENOENT, 595 "parsing %s/mac", xvdi_get_xsname(xnfp->xnf_devinfo)); 596 return; 597 } 598 599 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->xnf_devinfo), 600 "feature-no-csum-offload", "%d", &be_no_cksum_offload); 601 /* 602 * If we fail to read the store we assume that the key is 603 * absent, implying an older domain at the far end. Older 604 * domains always support checksum offload. 605 */ 606 if (err != 0) 607 be_no_cksum_offload = 0; 608 /* 609 * If the far end cannot do checksum offload or we do not wish 610 * to do it, disable it. 611 */ 612 if ((be_no_cksum_offload == 1) || !xnfp->xnf_cksum_offload) 613 xnfp->xnf_cksum_offload = B_FALSE; 614 } 615 616 /* 617 * attach(9E) -- Attach a device to the system 618 * 619 * Called once for each board successfully probed. 620 */ 621 static int 622 xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) 623 { 624 mac_register_t *macp; 625 xnf_t *xnfp; 626 int err; 627 628 #ifdef XNF_DEBUG 629 if (xnfdebug & XNF_DEBUG_DDI) 630 printf("xnf%d: attach(0x%p)\n", ddi_get_instance(devinfo), 631 (void *)devinfo); 632 #endif 633 634 switch (cmd) { 635 case DDI_RESUME: 636 xnfp = ddi_get_driver_private(devinfo); 637 638 (void) xvdi_resume(devinfo); 639 (void) xvdi_alloc_evtchn(devinfo); 640 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 641 #ifdef XPV_HVM_DRIVER 642 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, 643 xnfp); 644 #else 645 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, 646 (caddr_t)xnfp); 647 #endif 648 xnf_be_connect(xnfp); 649 /* 650 * Our MAC address may have changed if we're resuming: 651 * - on a different host 652 * - on the same one and got a different MAC address 653 * because we didn't specify one of our own. 654 * so it's useful to claim that it changed in order that 655 * IP send out a gratuitous ARP. 656 */ 657 mac_unicst_update(xnfp->xnf_mh, xnfp->xnf_mac_addr); 658 return (DDI_SUCCESS); 659 660 case DDI_ATTACH: 661 break; 662 663 default: 664 return (DDI_FAILURE); 665 } 666 667 /* 668 * Allocate gld_mac_info_t and xnf_instance structures 669 */ 670 macp = mac_alloc(MAC_VERSION); 671 if (macp == NULL) 672 return (DDI_FAILURE); 673 xnfp = kmem_zalloc(sizeof (*xnfp), KM_SLEEP); 674 675 macp->m_dip = devinfo; 676 macp->m_driver = xnfp; 677 xnfp->xnf_devinfo = devinfo; 678 679 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 680 macp->m_src_addr = xnfp->xnf_mac_addr; 681 macp->m_callbacks = &xnf_callbacks; 682 macp->m_min_sdu = 0; 683 macp->m_max_sdu = XNF_MAXPKT; 684 685 xnfp->xnf_running = B_FALSE; 686 xnfp->xnf_connected = B_FALSE; 687 xnfp->xnf_cksum_offload = xnf_cksum_offload; 688 xnfp->xnf_tx_pages_readonly = xnf_tx_pages_readonly; 689 xnfp->xnf_need_sched = B_FALSE; 690 691 xnfp->xnf_rx_hvcopy = xnf_hvcopy_peer_status(devinfo) && xnf_rx_hvcopy; 692 #ifdef XPV_HVM_DRIVER 693 /* 694 * Report our version to dom0. 695 */ 696 if (xenbus_printf(XBT_NULL, "hvmpv/xnf", "version", "%d", 697 HVMPV_XNF_VERS)) 698 cmn_err(CE_WARN, "xnf: couldn't write version\n"); 699 700 if (!xnfp->xnf_rx_hvcopy) { 701 cmn_err(CE_WARN, "The xnf driver requires a dom0 that " 702 "supports 'feature-rx-copy'"); 703 goto failure; 704 } 705 #endif 706 707 /* 708 * Get the iblock cookie with which to initialize the mutexes. 709 */ 710 if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->xnf_icookie) 711 != DDI_SUCCESS) 712 goto failure; 713 /* 714 * Driver locking strategy: the txlock protects all paths 715 * through the driver, except the interrupt thread. 716 * If the interrupt thread needs to do something which could 717 * affect the operation of any other part of the driver, 718 * it needs to acquire the txlock mutex. 719 */ 720 mutex_init(&xnfp->xnf_tx_buf_mutex, 721 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 722 mutex_init(&xnfp->xnf_rx_buf_mutex, 723 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 724 mutex_init(&xnfp->xnf_txlock, 725 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 726 mutex_init(&xnfp->xnf_intrlock, 727 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 728 cv_init(&xnfp->xnf_cv, NULL, CV_DEFAULT, NULL); 729 730 xnfp->xnf_gref_tx_head = (grant_ref_t)-1; 731 xnfp->xnf_gref_rx_head = (grant_ref_t)-1; 732 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, 733 &xnfp->xnf_gref_tx_head) < 0) { 734 cmn_err(CE_WARN, "xnf%d: can't alloc tx grant refs", 735 ddi_get_instance(xnfp->xnf_devinfo)); 736 goto failure_1; 737 } 738 if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, 739 &xnfp->xnf_gref_rx_head) < 0) { 740 cmn_err(CE_WARN, "xnf%d: can't alloc rx grant refs", 741 ddi_get_instance(xnfp->xnf_devinfo)); 742 goto failure_1; 743 } 744 if (xnf_alloc_dma_resources(xnfp) == DDI_FAILURE) { 745 cmn_err(CE_WARN, "xnf%d: failed to allocate and initialize " 746 "driver data structures", 747 ddi_get_instance(xnfp->xnf_devinfo)); 748 goto failure_1; 749 } 750 751 xnfp->xnf_rx_ring.sring->rsp_event = 752 xnfp->xnf_tx_ring.sring->rsp_event = 1; 753 754 xnfp->xnf_tx_ring_ref = GRANT_INVALID_REF; 755 xnfp->xnf_rx_ring_ref = GRANT_INVALID_REF; 756 757 /* set driver private pointer now */ 758 ddi_set_driver_private(devinfo, xnfp); 759 760 if (xvdi_add_event_handler(devinfo, XS_OE_STATE, oe_state_change, NULL) 761 != DDI_SUCCESS) 762 goto failure_1; 763 764 if (!xnf_kstat_init(xnfp)) 765 goto failure_2; 766 767 /* 768 * Allocate an event channel, add the interrupt handler and 769 * bind it to the event channel. 770 */ 771 (void) xvdi_alloc_evtchn(devinfo); 772 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 773 #ifdef XPV_HVM_DRIVER 774 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, xnfp); 775 #else 776 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, (caddr_t)xnfp); 777 #endif 778 779 xnf_read_config(xnfp); 780 err = mac_register(macp, &xnfp->xnf_mh); 781 mac_free(macp); 782 macp = NULL; 783 if (err != 0) 784 goto failure_3; 785 786 #ifdef XPV_HVM_DRIVER 787 /* 788 * In the HVM case, this driver essentially replaces a driver for 789 * a 'real' PCI NIC. Without the "model" property set to 790 * "Ethernet controller", like the PCI code does, netbooting does 791 * not work correctly, as strplumb_get_netdev_path() will not find 792 * this interface. 793 */ 794 (void) ndi_prop_update_string(DDI_DEV_T_NONE, devinfo, "model", 795 "Ethernet controller"); 796 #endif 797 798 /* 799 * connect to the backend 800 */ 801 xnf_be_connect(xnfp); 802 803 return (DDI_SUCCESS); 804 805 failure_3: 806 kstat_delete(xnfp->xnf_kstat_aux); 807 #ifdef XPV_HVM_DRIVER 808 ec_unbind_evtchn(xnfp->xnf_evtchn); 809 xvdi_free_evtchn(devinfo); 810 #else 811 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 812 #endif 813 xnfp->xnf_evtchn = INVALID_EVTCHN; 814 815 failure_2: 816 xvdi_remove_event_handler(devinfo, XS_OE_STATE); 817 818 failure_1: 819 if (xnfp->xnf_gref_tx_head != (grant_ref_t)-1) 820 gnttab_free_grant_references(xnfp->xnf_gref_tx_head); 821 if (xnfp->xnf_gref_rx_head != (grant_ref_t)-1) 822 gnttab_free_grant_references(xnfp->xnf_gref_rx_head); 823 xnf_release_dma_resources(xnfp); 824 cv_destroy(&xnfp->xnf_cv); 825 mutex_destroy(&xnfp->xnf_rx_buf_mutex); 826 mutex_destroy(&xnfp->xnf_txlock); 827 mutex_destroy(&xnfp->xnf_intrlock); 828 829 failure: 830 kmem_free(xnfp, sizeof (*xnfp)); 831 if (macp != NULL) 832 mac_free(macp); 833 834 return (DDI_FAILURE); 835 } 836 837 /* detach(9E) -- Detach a device from the system */ 838 static int 839 xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) 840 { 841 xnf_t *xnfp; /* Our private device info */ 842 int i; 843 844 #ifdef XNF_DEBUG 845 if (xnfdebug & XNF_DEBUG_DDI) 846 printf("xnf_detach(0x%p)\n", (void *)devinfo); 847 #endif 848 849 xnfp = ddi_get_driver_private(devinfo); 850 851 switch (cmd) { 852 case DDI_SUSPEND: 853 #ifdef XPV_HVM_DRIVER 854 ec_unbind_evtchn(xnfp->xnf_evtchn); 855 xvdi_free_evtchn(devinfo); 856 #else 857 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 858 #endif 859 860 xvdi_suspend(devinfo); 861 862 mutex_enter(&xnfp->xnf_intrlock); 863 mutex_enter(&xnfp->xnf_txlock); 864 865 xnfp->xnf_evtchn = INVALID_EVTCHN; 866 xnfp->xnf_connected = B_FALSE; 867 mutex_exit(&xnfp->xnf_txlock); 868 mutex_exit(&xnfp->xnf_intrlock); 869 870 /* claim link to be down after disconnect */ 871 mac_link_update(xnfp->xnf_mh, LINK_STATE_DOWN); 872 return (DDI_SUCCESS); 873 874 case DDI_DETACH: 875 break; 876 877 default: 878 return (DDI_FAILURE); 879 } 880 881 if (xnfp->xnf_connected) 882 return (DDI_FAILURE); 883 884 /* Wait for receive buffers to be returned; give up after 5 seconds */ 885 i = 50; 886 887 mutex_enter(&xnfp->xnf_rx_buf_mutex); 888 while (xnfp->xnf_rx_bufs_outstanding > 0) { 889 mutex_exit(&xnfp->xnf_rx_buf_mutex); 890 delay(drv_usectohz(100000)); 891 if (--i == 0) { 892 cmn_err(CE_WARN, 893 "xnf%d: never reclaimed all the " 894 "receive buffers. Still have %d " 895 "buffers outstanding.", 896 ddi_get_instance(xnfp->xnf_devinfo), 897 xnfp->xnf_rx_bufs_outstanding); 898 return (DDI_FAILURE); 899 } 900 mutex_enter(&xnfp->xnf_rx_buf_mutex); 901 } 902 mutex_exit(&xnfp->xnf_rx_buf_mutex); 903 904 if (mac_unregister(xnfp->xnf_mh) != 0) 905 return (DDI_FAILURE); 906 907 kstat_delete(xnfp->xnf_kstat_aux); 908 909 /* Stop the receiver */ 910 xnf_stop(xnfp); 911 912 xvdi_remove_event_handler(devinfo, XS_OE_STATE); 913 914 /* Remove the interrupt */ 915 #ifdef XPV_HVM_DRIVER 916 ec_unbind_evtchn(xnfp->xnf_evtchn); 917 xvdi_free_evtchn(devinfo); 918 #else 919 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 920 #endif 921 922 /* Release any pending xmit mblks */ 923 xnf_release_mblks(xnfp); 924 925 /* Release all DMA resources */ 926 xnf_release_dma_resources(xnfp); 927 928 cv_destroy(&xnfp->xnf_cv); 929 mutex_destroy(&xnfp->xnf_rx_buf_mutex); 930 mutex_destroy(&xnfp->xnf_txlock); 931 mutex_destroy(&xnfp->xnf_intrlock); 932 933 kmem_free(xnfp, sizeof (*xnfp)); 934 935 return (DDI_SUCCESS); 936 } 937 938 /* 939 * xnf_set_mac_addr() -- set the physical network address on the board. 940 */ 941 /*ARGSUSED*/ 942 static int 943 xnf_set_mac_addr(void *arg, const uint8_t *macaddr) 944 { 945 xnf_t *xnfp = arg; 946 947 #ifdef XNF_DEBUG 948 if (xnfdebug & XNF_DEBUG_TRACE) 949 printf("xnf%d: set_mac_addr(0x%p): " 950 "%02x:%02x:%02x:%02x:%02x:%02x\n", 951 ddi_get_instance(xnfp->xnf_devinfo), 952 (void *)xnfp, macaddr[0], macaddr[1], macaddr[2], 953 macaddr[3], macaddr[4], macaddr[5]); 954 #endif 955 /* 956 * We can't set our macaddr. 957 * 958 * XXPV dme: Why not? 959 */ 960 return (ENOTSUP); 961 } 962 963 /* 964 * xnf_set_multicast() -- set (enable) or disable a multicast address. 965 * 966 * Program the hardware to enable/disable the multicast address 967 * in "mcast". Enable if "add" is true, disable if false. 968 */ 969 /*ARGSUSED*/ 970 static int 971 xnf_set_multicast(void *arg, boolean_t add, const uint8_t *mca) 972 { 973 xnf_t *xnfp = arg; 974 975 #ifdef XNF_DEBUG 976 if (xnfdebug & XNF_DEBUG_TRACE) 977 printf("xnf%d set_multicast(0x%p): " 978 "%02x:%02x:%02x:%02x:%02x:%02x\n", 979 ddi_get_instance(xnfp->xnf_devinfo), 980 (void *)xnfp, mca[0], mca[1], mca[2], 981 mca[3], mca[4], mca[5]); 982 #endif 983 984 /* 985 * XXPV dme: Ideally we'd relay the address to the backend for 986 * enabling. The protocol doesn't support that (interesting 987 * extension), so we simply succeed and hope that the relevant 988 * packets are going to arrive. 989 * 990 * If protocol support is added for enable/disable then we'll 991 * need to keep a list of those in use and re-add on resume. 992 */ 993 return (0); 994 } 995 996 /* 997 * xnf_set_promiscuous() -- set or reset promiscuous mode on the board 998 * 999 * Program the hardware to enable/disable promiscuous mode. 1000 */ 1001 /*ARGSUSED*/ 1002 static int 1003 xnf_set_promiscuous(void *arg, boolean_t on) 1004 { 1005 xnf_t *xnfp = arg; 1006 1007 #ifdef XNF_DEBUG 1008 if (xnfdebug & XNF_DEBUG_TRACE) 1009 printf("xnf%d set_promiscuous(0x%p, %x)\n", 1010 ddi_get_instance(xnfp->xnf_devinfo), 1011 (void *)xnfp, on); 1012 #endif 1013 /* 1014 * We can't really do this, but we pretend that we can in 1015 * order that snoop will work. 1016 */ 1017 return (0); 1018 } 1019 1020 /* 1021 * Clean buffers that we have responses for from the transmit ring. 1022 */ 1023 static int 1024 xnf_clean_tx_ring(xnf_t *xnfp) 1025 { 1026 RING_IDX next_resp, i; 1027 struct tx_pktinfo *reap; 1028 int id; 1029 grant_ref_t ref; 1030 boolean_t work_to_do; 1031 1032 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 1033 1034 loop: 1035 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_tx_ring)) { 1036 /* 1037 * index of next transmission ack 1038 */ 1039 next_resp = xnfp->xnf_tx_ring.sring->rsp_prod; 1040 membar_consumer(); 1041 /* 1042 * Clean tx packets from ring that we have responses for 1043 */ 1044 for (i = xnfp->xnf_tx_ring.rsp_cons; i != next_resp; i++) { 1045 id = RING_GET_RESPONSE(&xnfp->xnf_tx_ring, i)->id; 1046 reap = &xnfp->xnf_tx_pkt_info[id]; 1047 ref = reap->grant_ref; 1048 /* 1049 * Return id to free list 1050 */ 1051 reap->id = xnfp->xnf_tx_pkt_id_list; 1052 xnfp->xnf_tx_pkt_id_list = id; 1053 if (gnttab_query_foreign_access(ref) != 0) 1054 panic("tx grant still in use " 1055 "by backend domain"); 1056 (void) ddi_dma_unbind_handle(reap->dma_handle); 1057 (void) gnttab_end_foreign_access_ref(ref, 1058 xnfp->xnf_tx_pages_readonly); 1059 gnttab_release_grant_reference(&xnfp->xnf_gref_tx_head, 1060 ref); 1061 freemsg(reap->mp); 1062 reap->mp = NULL; 1063 reap->grant_ref = GRANT_INVALID_REF; 1064 if (reap->bdesc != NULL) 1065 xnf_free_tx_buffer(reap->bdesc); 1066 reap->bdesc = NULL; 1067 } 1068 xnfp->xnf_tx_ring.rsp_cons = next_resp; 1069 membar_enter(); 1070 } 1071 1072 /* LINTED: constant in conditional context */ 1073 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_tx_ring, work_to_do); 1074 if (work_to_do) 1075 goto loop; 1076 1077 return (RING_FREE_REQUESTS(&xnfp->xnf_tx_ring)); 1078 } 1079 1080 /* 1081 * If we need to pull up data from either a packet that crosses a page 1082 * boundary or consisting of multiple mblks, do it here. We allocate 1083 * a page aligned buffer and copy the data into it. The header for the 1084 * allocated buffer is returned. (which is also allocated here) 1085 */ 1086 static struct xnf_buffer_desc * 1087 xnf_pullupmsg(xnf_t *xnfp, mblk_t *mp) 1088 { 1089 struct xnf_buffer_desc *bdesc; 1090 mblk_t *mptr; 1091 caddr_t bp; 1092 int len; 1093 1094 /* 1095 * get a xmit buffer from the xmit buffer pool 1096 */ 1097 mutex_enter(&xnfp->xnf_rx_buf_mutex); 1098 bdesc = xnf_get_tx_buffer(xnfp); 1099 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1100 if (bdesc == NULL) 1101 return (bdesc); 1102 /* 1103 * Copy the data into the buffer 1104 */ 1105 xnfp->xnf_stat_tx_pullup++; 1106 bp = bdesc->buf; 1107 for (mptr = mp; mptr != NULL; mptr = mptr->b_cont) { 1108 len = mptr->b_wptr - mptr->b_rptr; 1109 bcopy(mptr->b_rptr, bp, len); 1110 bp += len; 1111 } 1112 return (bdesc); 1113 } 1114 1115 void 1116 xnf_pseudo_cksum(caddr_t buf, int length) 1117 { 1118 struct ether_header *ehp; 1119 uint16_t sap, len, *stuff; 1120 uint32_t cksum; 1121 size_t offset; 1122 ipha_t *ipha; 1123 ipaddr_t src, dst; 1124 1125 ASSERT(length >= sizeof (*ehp)); 1126 ehp = (struct ether_header *)buf; 1127 1128 if (ntohs(ehp->ether_type) == VLAN_TPID) { 1129 struct ether_vlan_header *evhp; 1130 1131 ASSERT(length >= sizeof (*evhp)); 1132 evhp = (struct ether_vlan_header *)buf; 1133 sap = ntohs(evhp->ether_type); 1134 offset = sizeof (*evhp); 1135 } else { 1136 sap = ntohs(ehp->ether_type); 1137 offset = sizeof (*ehp); 1138 } 1139 1140 ASSERT(sap == ETHERTYPE_IP); 1141 1142 /* Packet should have been pulled up by the caller. */ 1143 if ((offset + sizeof (ipha_t)) > length) { 1144 cmn_err(CE_WARN, "xnf_pseudo_cksum: no room for checksum"); 1145 return; 1146 } 1147 1148 ipha = (ipha_t *)(buf + offset); 1149 1150 ASSERT(IPH_HDR_LENGTH(ipha) == IP_SIMPLE_HDR_LENGTH); 1151 1152 len = ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH; 1153 1154 switch (ipha->ipha_protocol) { 1155 case IPPROTO_TCP: 1156 stuff = IPH_TCPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 1157 cksum = IP_TCP_CSUM_COMP; 1158 break; 1159 case IPPROTO_UDP: 1160 stuff = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 1161 cksum = IP_UDP_CSUM_COMP; 1162 break; 1163 default: 1164 cmn_err(CE_WARN, "xnf_pseudo_cksum: unexpected protocol %d", 1165 ipha->ipha_protocol); 1166 return; 1167 } 1168 1169 src = ipha->ipha_src; 1170 dst = ipha->ipha_dst; 1171 1172 cksum += (dst >> 16) + (dst & 0xFFFF); 1173 cksum += (src >> 16) + (src & 0xFFFF); 1174 cksum += htons(len); 1175 1176 cksum = (cksum >> 16) + (cksum & 0xFFFF); 1177 cksum = (cksum >> 16) + (cksum & 0xFFFF); 1178 1179 ASSERT(cksum <= 0xFFFF); 1180 1181 *stuff = (uint16_t)(cksum ? cksum : ~cksum); 1182 } 1183 1184 /* 1185 * xnf_send_one() -- send a packet 1186 * 1187 * Called when a packet is ready to be transmitted. A pointer to an 1188 * M_DATA message that contains the packet is passed to this routine. 1189 * At least the complete LLC header is contained in the message's 1190 * first message block, and the remainder of the packet is contained 1191 * within additional M_DATA message blocks linked to the first 1192 * message block. 1193 * 1194 */ 1195 static boolean_t 1196 xnf_send_one(xnf_t *xnfp, mblk_t *mp) 1197 { 1198 struct xnf_buffer_desc *xmitbuf; 1199 struct tx_pktinfo *txp_info; 1200 mblk_t *mptr; 1201 ddi_dma_cookie_t dma_cookie; 1202 RING_IDX slot; 1203 int length = 0, i, pktlen = 0, rc, tx_id; 1204 int tx_ring_freespace, page_oops; 1205 uint_t ncookies; 1206 volatile netif_tx_request_t *txrp; 1207 caddr_t bufaddr; 1208 grant_ref_t ref; 1209 unsigned long mfn; 1210 uint32_t pflags; 1211 domid_t oeid; 1212 1213 #ifdef XNF_DEBUG 1214 if (xnfdebug & XNF_DEBUG_SEND) 1215 printf("xnf%d send(0x%p, 0x%p)\n", 1216 ddi_get_instance(xnfp->xnf_devinfo), 1217 (void *)xnfp, (void *)mp); 1218 #endif 1219 1220 ASSERT(mp != NULL); 1221 ASSERT(mp->b_next == NULL); 1222 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 1223 1224 tx_ring_freespace = xnf_clean_tx_ring(xnfp); 1225 ASSERT(tx_ring_freespace >= 0); 1226 1227 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 1228 xnfp->xnf_stat_tx_attempt++; 1229 /* 1230 * If there are no xmit ring slots available, return. 1231 */ 1232 if (tx_ring_freespace == 0) { 1233 xnfp->xnf_stat_tx_defer++; 1234 return (B_FALSE); /* Send should be retried */ 1235 } 1236 1237 slot = xnfp->xnf_tx_ring.req_prod_pvt; 1238 /* Count the number of mblks in message and compute packet size */ 1239 for (i = 0, mptr = mp; mptr != NULL; mptr = mptr->b_cont, i++) 1240 pktlen += (mptr->b_wptr - mptr->b_rptr); 1241 1242 /* Make sure packet isn't too large */ 1243 if (pktlen > XNF_FRAMESIZE) { 1244 cmn_err(CE_WARN, "xnf%d: oversized packet (%d bytes) dropped", 1245 ddi_get_instance(xnfp->xnf_devinfo), pktlen); 1246 freemsg(mp); 1247 return (B_TRUE); 1248 } 1249 1250 /* 1251 * Test if we cross a page boundary with our buffer 1252 */ 1253 page_oops = (i == 1) && 1254 (xnf_btop((size_t)mp->b_rptr) != 1255 xnf_btop((size_t)(mp->b_rptr + pktlen))); 1256 /* 1257 * XXPV - unfortunately, the Xen virtual net device currently 1258 * doesn't support multiple packet frags, so this will always 1259 * end up doing the pullup if we got more than one packet. 1260 */ 1261 if (i > xnf_max_tx_frags || page_oops) { 1262 if (page_oops) 1263 xnfp->xnf_stat_tx_pagebndry++; 1264 if ((xmitbuf = xnf_pullupmsg(xnfp, mp)) == NULL) { 1265 /* could not allocate resources? */ 1266 #ifdef XNF_DEBUG 1267 cmn_err(CE_WARN, "xnf%d: pullupmsg failed", 1268 ddi_get_instance(xnfp->xnf_devinfo)); 1269 #endif 1270 xnfp->xnf_stat_tx_defer++; 1271 return (B_FALSE); /* Retry send */ 1272 } 1273 bufaddr = xmitbuf->buf; 1274 } else { 1275 xmitbuf = NULL; 1276 bufaddr = (caddr_t)mp->b_rptr; 1277 } 1278 1279 /* set up data descriptor */ 1280 length = pktlen; 1281 1282 /* 1283 * Get packet id from free list 1284 */ 1285 tx_id = xnfp->xnf_tx_pkt_id_list; 1286 ASSERT(tx_id < NET_TX_RING_SIZE); 1287 txp_info = &xnfp->xnf_tx_pkt_info[tx_id]; 1288 xnfp->xnf_tx_pkt_id_list = txp_info->id; 1289 txp_info->id = tx_id; 1290 1291 /* Prepare for DMA mapping of tx buffer(s) */ 1292 rc = ddi_dma_addr_bind_handle(txp_info->dma_handle, 1293 NULL, bufaddr, length, DDI_DMA_WRITE | DDI_DMA_STREAMING, 1294 DDI_DMA_DONTWAIT, 0, &dma_cookie, &ncookies); 1295 if (rc != DDI_DMA_MAPPED) { 1296 ASSERT(rc != DDI_DMA_INUSE); 1297 ASSERT(rc != DDI_DMA_PARTIAL_MAP); 1298 /* 1299 * Return id to free list 1300 */ 1301 txp_info->id = xnfp->xnf_tx_pkt_id_list; 1302 xnfp->xnf_tx_pkt_id_list = tx_id; 1303 if (rc == DDI_DMA_NORESOURCES) { 1304 xnfp->xnf_stat_tx_defer++; 1305 return (B_FALSE); /* Retry later */ 1306 } 1307 #ifdef XNF_DEBUG 1308 cmn_err(CE_WARN, "xnf%d: bind_handle failed (%x)", 1309 ddi_get_instance(xnfp->xnf_devinfo), rc); 1310 #endif 1311 return (B_FALSE); 1312 } 1313 1314 ASSERT(ncookies == 1); 1315 ref = gnttab_claim_grant_reference(&xnfp->xnf_gref_tx_head); 1316 ASSERT((signed short)ref >= 0); 1317 mfn = xnf_btop(pa_to_ma((paddr_t)dma_cookie.dmac_laddress)); 1318 gnttab_grant_foreign_access_ref(ref, oeid, mfn, 1319 xnfp->xnf_tx_pages_readonly); 1320 txp_info->grant_ref = ref; 1321 txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot); 1322 txrp->gref = ref; 1323 txrp->size = dma_cookie.dmac_size; 1324 txrp->offset = (uintptr_t)bufaddr & PAGEOFFSET; 1325 txrp->id = tx_id; 1326 txrp->flags = 0; 1327 hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags); 1328 if (pflags != 0) { 1329 ASSERT(xnfp->xnf_cksum_offload); 1330 /* 1331 * If the local protocol stack requests checksum 1332 * offload we set the 'checksum blank' flag, 1333 * indicating to the peer that we need the checksum 1334 * calculated for us. 1335 * 1336 * We _don't_ set the validated flag, because we haven't 1337 * validated that the data and the checksum match. 1338 */ 1339 xnf_pseudo_cksum(bufaddr, length); 1340 txrp->flags |= NETTXF_csum_blank; 1341 xnfp->xnf_stat_tx_cksum_deferred++; 1342 } 1343 membar_producer(); 1344 xnfp->xnf_tx_ring.req_prod_pvt = slot + 1; 1345 1346 txp_info->mp = mp; 1347 txp_info->bdesc = xmitbuf; 1348 1349 xnfp->xnf_stat_opackets++; 1350 xnfp->xnf_stat_obytes += pktlen; 1351 1352 return (B_TRUE); /* successful transmit attempt */ 1353 } 1354 1355 mblk_t * 1356 xnf_send(void *arg, mblk_t *mp) 1357 { 1358 xnf_t *xnfp = arg; 1359 mblk_t *next; 1360 boolean_t sent_something = B_FALSE; 1361 1362 mutex_enter(&xnfp->xnf_txlock); 1363 1364 /* 1365 * Transmission attempts should be impossible without having 1366 * previously called xnf_start(). 1367 */ 1368 ASSERT(xnfp->xnf_running); 1369 1370 /* 1371 * Wait for getting connected to the backend 1372 */ 1373 while (!xnfp->xnf_connected) { 1374 cv_wait(&xnfp->xnf_cv, &xnfp->xnf_txlock); 1375 } 1376 1377 while (mp != NULL) { 1378 next = mp->b_next; 1379 mp->b_next = NULL; 1380 1381 if (!xnf_send_one(xnfp, mp)) { 1382 mp->b_next = next; 1383 break; 1384 } 1385 1386 mp = next; 1387 sent_something = B_TRUE; 1388 } 1389 1390 if (sent_something) { 1391 boolean_t notify; 1392 1393 /* LINTED: constant in conditional context */ 1394 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring, 1395 notify); 1396 if (notify) 1397 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1398 } 1399 1400 xnfp->xnf_need_sched = !sent_something; 1401 1402 mutex_exit(&xnfp->xnf_txlock); 1403 1404 return (mp); 1405 } 1406 1407 /* 1408 * xnf_intr() -- ring interrupt service routine 1409 */ 1410 static uint_t 1411 xnf_intr(caddr_t arg) 1412 { 1413 xnf_t *xnfp = (xnf_t *)arg; 1414 boolean_t sched = B_FALSE; 1415 1416 mutex_enter(&xnfp->xnf_intrlock); 1417 1418 /* spurious intr */ 1419 if (!xnfp->xnf_connected) { 1420 mutex_exit(&xnfp->xnf_intrlock); 1421 xnfp->xnf_stat_unclaimed_interrupts++; 1422 return (DDI_INTR_UNCLAIMED); 1423 } 1424 1425 #ifdef XNF_DEBUG 1426 if (xnfdebug & XNF_DEBUG_INT) 1427 printf("xnf%d intr(0x%p)\n", 1428 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1429 #endif 1430 if (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1431 mblk_t *mp; 1432 1433 if (xnfp->xnf_rx_hvcopy) 1434 mp = xnf_process_hvcopy_recv(xnfp); 1435 else 1436 mp = xnf_process_recv(xnfp); 1437 1438 if (mp != NULL) 1439 mac_rx(xnfp->xnf_mh, xnfp->xnf_rx_handle, mp); 1440 } 1441 1442 xnfp->xnf_stat_interrupts++; 1443 mutex_exit(&xnfp->xnf_intrlock); 1444 1445 /* 1446 * Clean tx ring and try to start any blocked xmit streams if 1447 * there is now some space. 1448 */ 1449 mutex_enter(&xnfp->xnf_txlock); 1450 if (xnf_clean_tx_ring(xnfp) > 0) { 1451 sched = xnfp->xnf_need_sched; 1452 xnfp->xnf_need_sched = B_FALSE; 1453 } 1454 mutex_exit(&xnfp->xnf_txlock); 1455 1456 if (sched) 1457 mac_tx_update(xnfp->xnf_mh); 1458 1459 return (DDI_INTR_CLAIMED); 1460 } 1461 1462 /* 1463 * xnf_start() -- start the board receiving and enable interrupts. 1464 */ 1465 static int 1466 xnf_start(void *arg) 1467 { 1468 xnf_t *xnfp = arg; 1469 1470 #ifdef XNF_DEBUG 1471 if (xnfdebug & XNF_DEBUG_TRACE) 1472 printf("xnf%d start(0x%p)\n", 1473 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1474 #endif 1475 1476 mutex_enter(&xnfp->xnf_intrlock); 1477 mutex_enter(&xnfp->xnf_txlock); 1478 1479 /* Accept packets from above. */ 1480 xnfp->xnf_running = B_TRUE; 1481 1482 mutex_exit(&xnfp->xnf_txlock); 1483 mutex_exit(&xnfp->xnf_intrlock); 1484 1485 return (0); 1486 } 1487 1488 /* xnf_stop() - disable hardware */ 1489 static void 1490 xnf_stop(void *arg) 1491 { 1492 xnf_t *xnfp = arg; 1493 1494 #ifdef XNF_DEBUG 1495 if (xnfdebug & XNF_DEBUG_TRACE) 1496 printf("xnf%d stop(0x%p)\n", 1497 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1498 #endif 1499 1500 mutex_enter(&xnfp->xnf_intrlock); 1501 mutex_enter(&xnfp->xnf_txlock); 1502 1503 xnfp->xnf_running = B_FALSE; 1504 1505 mutex_exit(&xnfp->xnf_txlock); 1506 mutex_exit(&xnfp->xnf_intrlock); 1507 } 1508 1509 /* 1510 * Driver private functions follow 1511 */ 1512 1513 /* 1514 * Hang buffer on rx ring 1515 */ 1516 static void 1517 rx_buffer_hang(xnf_t *xnfp, struct xnf_buffer_desc *bdesc) 1518 { 1519 volatile netif_rx_request_t *reqp; 1520 RING_IDX hang_ix; 1521 grant_ref_t ref; 1522 domid_t oeid; 1523 1524 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 1525 1526 ASSERT(MUTEX_HELD(&xnfp->xnf_intrlock)); 1527 reqp = RING_GET_REQUEST(&xnfp->xnf_rx_ring, 1528 xnfp->xnf_rx_ring.req_prod_pvt); 1529 hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0)); 1530 ASSERT(xnfp->xnf_rxpkt_bufptr[hang_ix] == NULL); 1531 if (bdesc->grant_ref == GRANT_INVALID_REF) { 1532 ref = gnttab_claim_grant_reference(&xnfp->xnf_gref_rx_head); 1533 ASSERT((signed short)ref >= 0); 1534 bdesc->grant_ref = ref; 1535 if (xnfp->xnf_rx_hvcopy) { 1536 pfn_t pfn = xnf_btop(bdesc->buf_phys); 1537 mfn_t mfn = pfn_to_mfn(pfn); 1538 1539 gnttab_grant_foreign_access_ref(ref, oeid, mfn, 0); 1540 } else { 1541 gnttab_grant_foreign_transfer_ref(ref, oeid, 0); 1542 } 1543 } 1544 reqp->id = hang_ix; 1545 reqp->gref = bdesc->grant_ref; 1546 bdesc->id = hang_ix; 1547 xnfp->xnf_rxpkt_bufptr[hang_ix] = bdesc; 1548 membar_producer(); 1549 xnfp->xnf_rx_ring.req_prod_pvt++; 1550 } 1551 1552 static mblk_t * 1553 xnf_process_hvcopy_recv(xnf_t *xnfp) 1554 { 1555 netif_rx_response_t *rxpkt; 1556 mblk_t *mp, *head, *tail; 1557 struct xnf_buffer_desc *bdesc; 1558 boolean_t hwcsum = B_FALSE, notify, work_to_do; 1559 size_t len; 1560 1561 /* 1562 * in loop over unconsumed responses, we do: 1563 * 1. get a response 1564 * 2. take corresponding buffer off recv. ring 1565 * 3. indicate this by setting slot to NULL 1566 * 4. create a new message and 1567 * 5. copy data in, adjust ptr 1568 * 1569 * outside loop: 1570 * 7. make sure no more data has arrived; kick HV 1571 */ 1572 1573 head = tail = NULL; 1574 1575 loop: 1576 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1577 1578 /* 1. */ 1579 rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring, 1580 xnfp->xnf_rx_ring.rsp_cons); 1581 1582 DTRACE_PROBE4(got_PKT, int, (int)rxpkt->id, int, 1583 (int)rxpkt->offset, 1584 int, (int)rxpkt->flags, int, (int)rxpkt->status); 1585 1586 /* 1587 * 2. 1588 * Take buffer off of receive ring 1589 */ 1590 hwcsum = B_FALSE; 1591 bdesc = xnfp->xnf_rxpkt_bufptr[rxpkt->id]; 1592 /* 3 */ 1593 xnfp->xnf_rxpkt_bufptr[rxpkt->id] = NULL; 1594 ASSERT(bdesc->id == rxpkt->id); 1595 mp = NULL; 1596 if (!xnfp->xnf_running) { 1597 DTRACE_PROBE4(pkt_dropped, int, rxpkt->status, 1598 char *, bdesc->buf, int, rxpkt->offset, 1599 char *, ((char *)bdesc->buf) + rxpkt->offset); 1600 xnfp->xnf_stat_drop++; 1601 /* 1602 * re-hang the buffer 1603 */ 1604 rx_buffer_hang(xnfp, bdesc); 1605 } else if (rxpkt->status <= 0) { 1606 DTRACE_PROBE4(pkt_status_negative, int, rxpkt->status, 1607 char *, bdesc->buf, int, rxpkt->offset, 1608 char *, ((char *)bdesc->buf) + rxpkt->offset); 1609 xnfp->xnf_stat_errrx++; 1610 if (rxpkt->status == 0) 1611 xnfp->xnf_stat_runt++; 1612 if (rxpkt->status == NETIF_RSP_ERROR) 1613 xnfp->xnf_stat_mac_rcv_error++; 1614 if (rxpkt->status == NETIF_RSP_DROPPED) 1615 xnfp->xnf_stat_norxbuf++; 1616 /* 1617 * re-hang the buffer 1618 */ 1619 rx_buffer_hang(xnfp, bdesc); 1620 } else { 1621 grant_ref_t ref = bdesc->grant_ref; 1622 struct xnf_buffer_desc *new_bdesc; 1623 unsigned long off = rxpkt->offset; 1624 1625 DTRACE_PROBE4(pkt_status_ok, int, rxpkt->status, 1626 char *, bdesc->buf, int, rxpkt->offset, 1627 char *, ((char *)bdesc->buf) + rxpkt->offset); 1628 len = rxpkt->status; 1629 ASSERT(off + len <= PAGEOFFSET); 1630 if (ref == GRANT_INVALID_REF) { 1631 mp = NULL; 1632 new_bdesc = bdesc; 1633 cmn_err(CE_WARN, "Bad rx grant reference %d " 1634 "from dom %d", ref, 1635 xvdi_get_oeid(xnfp->xnf_devinfo)); 1636 goto luckless; 1637 } 1638 /* 1639 * Release ref which we'll be re-claiming in 1640 * rx_buffer_hang(). 1641 */ 1642 bdesc->grant_ref = GRANT_INVALID_REF; 1643 (void) gnttab_end_foreign_access_ref(ref, 0); 1644 gnttab_release_grant_reference(&xnfp->xnf_gref_rx_head, 1645 ref); 1646 if (rxpkt->flags & NETRXF_data_validated) 1647 hwcsum = B_TRUE; 1648 1649 /* 1650 * XXPV for the initial implementation of HVcopy, 1651 * create a new msg and copy in the data 1652 */ 1653 /* 4. */ 1654 if ((mp = allocb(len, BPRI_MED)) == NULL) { 1655 /* 1656 * Couldn't get buffer to copy to, 1657 * drop this data, and re-hang 1658 * the buffer on the ring. 1659 */ 1660 xnfp->xnf_stat_norxbuf++; 1661 DTRACE_PROBE(alloc_nix); 1662 } else { 1663 /* 5. */ 1664 DTRACE_PROBE(alloc_ok); 1665 bcopy(bdesc->buf + off, mp->b_wptr, 1666 len); 1667 mp->b_wptr += len; 1668 } 1669 new_bdesc = bdesc; 1670 luckless: 1671 1672 /* Re-hang old or hang new buffer. */ 1673 rx_buffer_hang(xnfp, new_bdesc); 1674 } 1675 if (mp) { 1676 if (hwcsum) { 1677 /* 1678 * See comments in xnf_process_recv(). 1679 */ 1680 1681 (void) hcksum_assoc(mp, NULL, 1682 NULL, 0, 0, 0, 0, 1683 HCK_FULLCKSUM | 1684 HCK_FULLCKSUM_OK, 1685 0); 1686 xnfp->xnf_stat_rx_cksum_no_need++; 1687 } 1688 if (head == NULL) { 1689 head = tail = mp; 1690 } else { 1691 tail->b_next = mp; 1692 tail = mp; 1693 } 1694 1695 ASSERT(mp->b_next == NULL); 1696 1697 xnfp->xnf_stat_ipackets++; 1698 xnfp->xnf_stat_rbytes += len; 1699 } 1700 1701 xnfp->xnf_rx_ring.rsp_cons++; 1702 1703 xnfp->xnf_stat_hvcopy_packet_processed++; 1704 } 1705 1706 /* 7. */ 1707 /* 1708 * Has more data come in since we started? 1709 */ 1710 /* LINTED: constant in conditional context */ 1711 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_rx_ring, work_to_do); 1712 if (work_to_do) 1713 goto loop; 1714 1715 /* 1716 * Indicate to the backend that we have re-filled the receive 1717 * ring. 1718 */ 1719 /* LINTED: constant in conditional context */ 1720 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify); 1721 if (notify) 1722 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1723 1724 return (head); 1725 } 1726 1727 /* Process all queued received packets */ 1728 static mblk_t * 1729 xnf_process_recv(xnf_t *xnfp) 1730 { 1731 volatile netif_rx_response_t *rxpkt; 1732 mblk_t *mp, *head, *tail; 1733 struct xnf_buffer_desc *bdesc; 1734 extern mblk_t *desballoc(unsigned char *, size_t, uint_t, frtn_t *); 1735 boolean_t hwcsum = B_FALSE, notify, work_to_do; 1736 size_t len; 1737 pfn_t pfn; 1738 long cnt; 1739 1740 head = tail = NULL; 1741 loop: 1742 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1743 1744 rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring, 1745 xnfp->xnf_rx_ring.rsp_cons); 1746 1747 /* 1748 * Take buffer off of receive ring 1749 */ 1750 hwcsum = B_FALSE; 1751 bdesc = xnfp->xnf_rxpkt_bufptr[rxpkt->id]; 1752 xnfp->xnf_rxpkt_bufptr[rxpkt->id] = NULL; 1753 ASSERT(bdesc->id == rxpkt->id); 1754 mp = NULL; 1755 if (!xnfp->xnf_running) { 1756 xnfp->xnf_stat_drop++; 1757 /* 1758 * re-hang the buffer 1759 */ 1760 rx_buffer_hang(xnfp, bdesc); 1761 } else if (rxpkt->status <= 0) { 1762 xnfp->xnf_stat_errrx++; 1763 if (rxpkt->status == 0) 1764 xnfp->xnf_stat_runt++; 1765 if (rxpkt->status == NETIF_RSP_ERROR) 1766 xnfp->xnf_stat_mac_rcv_error++; 1767 if (rxpkt->status == NETIF_RSP_DROPPED) 1768 xnfp->xnf_stat_norxbuf++; 1769 /* 1770 * re-hang the buffer 1771 */ 1772 rx_buffer_hang(xnfp, bdesc); 1773 } else { 1774 grant_ref_t ref = bdesc->grant_ref; 1775 struct xnf_buffer_desc *new_bdesc; 1776 unsigned long off = rxpkt->offset; 1777 unsigned long mfn; 1778 1779 len = rxpkt->status; 1780 ASSERT(off + len <= PAGEOFFSET); 1781 if (ref == GRANT_INVALID_REF) { 1782 mp = NULL; 1783 new_bdesc = bdesc; 1784 cmn_err(CE_WARN, "Bad rx grant reference %d " 1785 "from dom %d", ref, 1786 xvdi_get_oeid(xnfp->xnf_devinfo)); 1787 goto luckless; 1788 } 1789 bdesc->grant_ref = GRANT_INVALID_REF; 1790 mfn = gnttab_end_foreign_transfer_ref(ref); 1791 ASSERT(mfn != MFN_INVALID); 1792 ASSERT(hat_getpfnum(kas.a_hat, bdesc->buf) == 1793 PFN_INVALID); 1794 1795 gnttab_release_grant_reference(&xnfp->xnf_gref_rx_head, 1796 ref); 1797 reassign_pfn(xnf_btop(bdesc->buf_phys), mfn); 1798 hat_devload(kas.a_hat, bdesc->buf, PAGESIZE, 1799 xnf_btop(bdesc->buf_phys), 1800 PROT_READ | PROT_WRITE, HAT_LOAD); 1801 balloon_drv_added(1); 1802 1803 if (rxpkt->flags & NETRXF_data_validated) 1804 hwcsum = B_TRUE; 1805 if (len <= xnf_rx_bcopy_thresh) { 1806 /* 1807 * For small buffers, just copy the data 1808 * and send the copy upstream. 1809 */ 1810 new_bdesc = NULL; 1811 } else { 1812 /* 1813 * We send a pointer to this data upstream; 1814 * we need a new buffer to replace this one. 1815 */ 1816 mutex_enter(&xnfp->xnf_rx_buf_mutex); 1817 new_bdesc = xnf_get_buffer(xnfp); 1818 if (new_bdesc != NULL) { 1819 xnfp->xnf_rx_bufs_outstanding++; 1820 } else { 1821 xnfp->xnf_stat_rx_no_ringbuf++; 1822 } 1823 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1824 } 1825 1826 if (new_bdesc == NULL) { 1827 /* 1828 * Don't have a new ring buffer; bcopy the data 1829 * from the buffer, and preserve the 1830 * original buffer 1831 */ 1832 if ((mp = allocb(len, BPRI_MED)) == NULL) { 1833 /* 1834 * Could't get buffer to copy to, 1835 * drop this data, and re-hang 1836 * the buffer on the ring. 1837 */ 1838 xnfp->xnf_stat_norxbuf++; 1839 } else { 1840 bcopy(bdesc->buf + off, mp->b_wptr, 1841 len); 1842 } 1843 /* 1844 * Give the buffer page back to xen 1845 */ 1846 pfn = xnf_btop(bdesc->buf_phys); 1847 cnt = balloon_free_pages(1, &mfn, bdesc->buf, 1848 &pfn); 1849 if (cnt != 1) { 1850 cmn_err(CE_WARN, "unable to give a " 1851 "page back to the hypervisor\n"); 1852 } 1853 new_bdesc = bdesc; 1854 } else { 1855 if ((mp = desballoc((unsigned char *)bdesc->buf, 1856 off + len, 0, (frtn_t *)bdesc)) == NULL) { 1857 /* 1858 * Couldn't get mblk to pass recv data 1859 * up with, free the old ring buffer 1860 */ 1861 xnfp->xnf_stat_norxbuf++; 1862 xnf_rcv_complete(bdesc); 1863 goto luckless; 1864 } 1865 (void) ddi_dma_sync(bdesc->dma_handle, 1866 0, 0, DDI_DMA_SYNC_FORCPU); 1867 1868 mp->b_wptr += off; 1869 mp->b_rptr += off; 1870 } 1871 luckless: 1872 if (mp) 1873 mp->b_wptr += len; 1874 /* re-hang old or hang new buffer */ 1875 rx_buffer_hang(xnfp, new_bdesc); 1876 } 1877 if (mp) { 1878 if (hwcsum) { 1879 /* 1880 * If the peer says that the data has 1881 * been validated then we declare that 1882 * the full checksum has been 1883 * verified. 1884 * 1885 * We don't look at the "checksum 1886 * blank" flag, and hence could have a 1887 * packet here that we are asserting 1888 * is good with a blank checksum. 1889 * 1890 * The hardware checksum offload 1891 * specification says that we must 1892 * provide the actual checksum as well 1893 * as an assertion that it is valid, 1894 * but the protocol stack doesn't 1895 * actually use it and some other 1896 * drivers don't bother, so we don't. 1897 * If it was necessary we could grovel 1898 * in the packet to find it. 1899 */ 1900 1901 (void) hcksum_assoc(mp, NULL, 1902 NULL, 0, 0, 0, 0, 1903 HCK_FULLCKSUM | 1904 HCK_FULLCKSUM_OK, 1905 0); 1906 xnfp->xnf_stat_rx_cksum_no_need++; 1907 } 1908 if (head == NULL) { 1909 head = tail = mp; 1910 } else { 1911 tail->b_next = mp; 1912 tail = mp; 1913 } 1914 1915 ASSERT(mp->b_next == NULL); 1916 1917 xnfp->xnf_stat_ipackets++; 1918 xnfp->xnf_stat_rbytes += len; 1919 } 1920 1921 xnfp->xnf_rx_ring.rsp_cons++; 1922 } 1923 1924 /* 1925 * Has more data come in since we started? 1926 */ 1927 /* LINTED: constant in conditional context */ 1928 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_rx_ring, work_to_do); 1929 if (work_to_do) 1930 goto loop; 1931 1932 /* 1933 * Indicate to the backend that we have re-filled the receive 1934 * ring. 1935 */ 1936 /* LINTED: constant in conditional context */ 1937 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify); 1938 if (notify) 1939 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1940 1941 return (head); 1942 } 1943 1944 /* Called when the upper layers free a message we passed upstream */ 1945 static void 1946 xnf_rcv_complete(struct xnf_buffer_desc *bdesc) 1947 { 1948 xnf_t *xnfp = bdesc->xnfp; 1949 pfn_t pfn; 1950 long cnt; 1951 1952 /* One less outstanding receive buffer */ 1953 mutex_enter(&xnfp->xnf_rx_buf_mutex); 1954 --xnfp->xnf_rx_bufs_outstanding; 1955 /* 1956 * Return buffer to the free list, unless the free list is getting 1957 * too large. XXPV - this threshold may need tuning. 1958 */ 1959 if (xnfp->xnf_rx_descs_free < xnf_rx_bufs_lowat) { 1960 /* 1961 * Unmap the page, and hand the machine page back 1962 * to xen so it can be re-used as a backend net buffer. 1963 */ 1964 pfn = xnf_btop(bdesc->buf_phys); 1965 cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn); 1966 if (cnt != 1) { 1967 cmn_err(CE_WARN, "unable to give a page back to the " 1968 "hypervisor\n"); 1969 } 1970 1971 bdesc->next = xnfp->xnf_free_list; 1972 xnfp->xnf_free_list = bdesc; 1973 xnfp->xnf_rx_descs_free++; 1974 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1975 } else { 1976 /* 1977 * We can return everything here since we have a free buffer 1978 * that we have not given the backing page for back to xen. 1979 */ 1980 --xnfp->xnf_rx_buffer_count; 1981 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1982 (void) ddi_dma_unbind_handle(bdesc->dma_handle); 1983 ddi_dma_mem_free(&bdesc->acc_handle); 1984 ddi_dma_free_handle(&bdesc->dma_handle); 1985 kmem_free(bdesc, sizeof (*bdesc)); 1986 } 1987 } 1988 1989 /* 1990 * xnf_alloc_dma_resources() -- initialize the drivers structures 1991 */ 1992 static int 1993 xnf_alloc_dma_resources(xnf_t *xnfp) 1994 { 1995 dev_info_t *devinfo = xnfp->xnf_devinfo; 1996 int i; 1997 size_t len; 1998 ddi_dma_cookie_t dma_cookie; 1999 uint_t ncookies; 2000 struct xnf_buffer_desc *bdesc; 2001 int rc; 2002 caddr_t rptr; 2003 2004 xnfp->xnf_n_rx = NET_RX_RING_SIZE; 2005 xnfp->xnf_max_rx_bufs = xnf_rx_bufs_hiwat; 2006 2007 xnfp->xnf_n_tx = NET_TX_RING_SIZE; 2008 2009 /* 2010 * The code below allocates all the DMA data structures that 2011 * need to be released when the driver is detached. 2012 * 2013 * First allocate handles for mapping (virtual address) pointers to 2014 * transmit data buffers to physical addresses 2015 */ 2016 for (i = 0; i < xnfp->xnf_n_tx; i++) { 2017 if ((rc = ddi_dma_alloc_handle(devinfo, 2018 &tx_buffer_dma_attr, DDI_DMA_SLEEP, 0, 2019 &xnfp->xnf_tx_pkt_info[i].dma_handle)) != DDI_SUCCESS) 2020 return (DDI_FAILURE); 2021 } 2022 2023 /* 2024 * Allocate page for the transmit descriptor ring. 2025 */ 2026 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 2027 DDI_DMA_SLEEP, 0, &xnfp->xnf_tx_ring_dma_handle) != DDI_SUCCESS) 2028 goto alloc_error; 2029 2030 if (ddi_dma_mem_alloc(xnfp->xnf_tx_ring_dma_handle, 2031 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 2032 DDI_DMA_SLEEP, 0, &rptr, &len, 2033 &xnfp->xnf_tx_ring_dma_acchandle) != DDI_SUCCESS) { 2034 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2035 xnfp->xnf_tx_ring_dma_handle = NULL; 2036 goto alloc_error; 2037 } 2038 2039 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_tx_ring_dma_handle, NULL, 2040 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 2041 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 2042 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 2043 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2044 xnfp->xnf_tx_ring_dma_handle = NULL; 2045 xnfp->xnf_tx_ring_dma_acchandle = NULL; 2046 if (rc == DDI_DMA_NORESOURCES) 2047 goto alloc_error; 2048 else 2049 goto error; 2050 } 2051 2052 ASSERT(ncookies == 1); 2053 bzero(rptr, PAGESIZE); 2054 /* LINTED: constant in conditional context */ 2055 SHARED_RING_INIT((netif_tx_sring_t *)rptr); 2056 /* LINTED: constant in conditional context */ 2057 FRONT_RING_INIT(&xnfp->xnf_tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE); 2058 xnfp->xnf_tx_ring_phys_addr = dma_cookie.dmac_laddress; 2059 2060 /* 2061 * Allocate page for the receive descriptor ring. 2062 */ 2063 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 2064 DDI_DMA_SLEEP, 0, &xnfp->xnf_rx_ring_dma_handle) != DDI_SUCCESS) 2065 goto alloc_error; 2066 2067 if (ddi_dma_mem_alloc(xnfp->xnf_rx_ring_dma_handle, 2068 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 2069 DDI_DMA_SLEEP, 0, &rptr, &len, 2070 &xnfp->xnf_rx_ring_dma_acchandle) != DDI_SUCCESS) { 2071 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2072 xnfp->xnf_rx_ring_dma_handle = NULL; 2073 goto alloc_error; 2074 } 2075 2076 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_rx_ring_dma_handle, NULL, 2077 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 2078 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 2079 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 2080 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2081 xnfp->xnf_rx_ring_dma_handle = NULL; 2082 xnfp->xnf_rx_ring_dma_acchandle = NULL; 2083 if (rc == DDI_DMA_NORESOURCES) 2084 goto alloc_error; 2085 else 2086 goto error; 2087 } 2088 2089 ASSERT(ncookies == 1); 2090 bzero(rptr, PAGESIZE); 2091 /* LINTED: constant in conditional context */ 2092 SHARED_RING_INIT((netif_rx_sring_t *)rptr); 2093 /* LINTED: constant in conditional context */ 2094 FRONT_RING_INIT(&xnfp->xnf_rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE); 2095 xnfp->xnf_rx_ring_phys_addr = dma_cookie.dmac_laddress; 2096 2097 /* 2098 * Preallocate receive buffers for each receive descriptor. 2099 */ 2100 2101 /* Set up the "free list" of receive buffer descriptors */ 2102 for (i = 0; i < xnfp->xnf_n_rx; i++) { 2103 if ((bdesc = xnf_alloc_buffer(xnfp)) == NULL) 2104 goto alloc_error; 2105 bdesc->next = xnfp->xnf_free_list; 2106 xnfp->xnf_free_list = bdesc; 2107 } 2108 2109 return (DDI_SUCCESS); 2110 2111 alloc_error: 2112 cmn_err(CE_WARN, "xnf%d: could not allocate enough DMA memory", 2113 ddi_get_instance(xnfp->xnf_devinfo)); 2114 error: 2115 xnf_release_dma_resources(xnfp); 2116 return (DDI_FAILURE); 2117 } 2118 2119 /* 2120 * Release all DMA resources in the opposite order from acquisition 2121 * Should not be called until all outstanding esballoc buffers 2122 * have been returned. 2123 */ 2124 static void 2125 xnf_release_dma_resources(xnf_t *xnfp) 2126 { 2127 int i; 2128 2129 /* 2130 * Free receive buffers which are currently associated with 2131 * descriptors 2132 */ 2133 for (i = 0; i < xnfp->xnf_n_rx; i++) { 2134 struct xnf_buffer_desc *bp; 2135 2136 if ((bp = xnfp->xnf_rxpkt_bufptr[i]) == NULL) 2137 continue; 2138 xnf_free_buffer(bp); 2139 xnfp->xnf_rxpkt_bufptr[i] = NULL; 2140 } 2141 2142 /* Free the receive ring buffer */ 2143 if (xnfp->xnf_rx_ring_dma_acchandle != NULL) { 2144 (void) ddi_dma_unbind_handle(xnfp->xnf_rx_ring_dma_handle); 2145 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 2146 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2147 xnfp->xnf_rx_ring_dma_acchandle = NULL; 2148 } 2149 /* Free the transmit ring buffer */ 2150 if (xnfp->xnf_tx_ring_dma_acchandle != NULL) { 2151 (void) ddi_dma_unbind_handle(xnfp->xnf_tx_ring_dma_handle); 2152 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 2153 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2154 xnfp->xnf_tx_ring_dma_acchandle = NULL; 2155 } 2156 2157 /* 2158 * Free handles for mapping (virtual address) pointers to 2159 * transmit data buffers to physical addresses 2160 */ 2161 for (i = 0; i < xnfp->xnf_n_tx; i++) { 2162 if (xnfp->xnf_tx_pkt_info[i].dma_handle != NULL) { 2163 ddi_dma_free_handle( 2164 &xnfp->xnf_tx_pkt_info[i].dma_handle); 2165 } 2166 } 2167 2168 } 2169 2170 static void 2171 xnf_release_mblks(xnf_t *xnfp) 2172 { 2173 int i; 2174 2175 for (i = 0; i < xnfp->xnf_n_tx; i++) { 2176 if (xnfp->xnf_tx_pkt_info[i].mp == NULL) 2177 continue; 2178 freemsg(xnfp->xnf_tx_pkt_info[i].mp); 2179 xnfp->xnf_tx_pkt_info[i].mp = NULL; 2180 (void) ddi_dma_unbind_handle( 2181 xnfp->xnf_tx_pkt_info[i].dma_handle); 2182 } 2183 } 2184 2185 /* 2186 * Remove a xmit buffer descriptor from the head of the free list and return 2187 * a pointer to it. If no buffers on list, attempt to allocate a new one. 2188 * Called with the tx_buf_mutex held. 2189 */ 2190 static struct xnf_buffer_desc * 2191 xnf_get_tx_buffer(xnf_t *xnfp) 2192 { 2193 struct xnf_buffer_desc *bdesc; 2194 2195 bdesc = xnfp->xnf_tx_free_list; 2196 if (bdesc != NULL) { 2197 xnfp->xnf_tx_free_list = bdesc->next; 2198 } else { 2199 bdesc = xnf_alloc_tx_buffer(xnfp); 2200 } 2201 return (bdesc); 2202 } 2203 2204 /* 2205 * Remove a buffer descriptor from the head of the free list and return 2206 * a pointer to it. If no buffers on list, attempt to allocate a new one. 2207 * Called with the rx_buf_mutex held. 2208 */ 2209 static struct xnf_buffer_desc * 2210 xnf_get_buffer(xnf_t *xnfp) 2211 { 2212 struct xnf_buffer_desc *bdesc; 2213 2214 bdesc = xnfp->xnf_free_list; 2215 if (bdesc != NULL) { 2216 xnfp->xnf_free_list = bdesc->next; 2217 xnfp->xnf_rx_descs_free--; 2218 } else { 2219 bdesc = xnf_alloc_buffer(xnfp); 2220 } 2221 return (bdesc); 2222 } 2223 2224 /* 2225 * Free a xmit buffer back to the xmit free list 2226 */ 2227 static void 2228 xnf_free_tx_buffer(struct xnf_buffer_desc *bp) 2229 { 2230 xnf_t *xnfp = bp->xnfp; 2231 2232 mutex_enter(&xnfp->xnf_tx_buf_mutex); 2233 bp->next = xnfp->xnf_tx_free_list; 2234 xnfp->xnf_tx_free_list = bp; 2235 mutex_exit(&xnfp->xnf_tx_buf_mutex); 2236 } 2237 2238 /* 2239 * Put a buffer descriptor onto the head of the free list. 2240 * for page-flip: 2241 * We can't really free these buffers back to the kernel 2242 * since we have given away their backing page to be used 2243 * by the back end net driver. 2244 * for hvcopy: 2245 * release all the memory 2246 */ 2247 static void 2248 xnf_free_buffer(struct xnf_buffer_desc *bdesc) 2249 { 2250 xnf_t *xnfp = bdesc->xnfp; 2251 2252 mutex_enter(&xnfp->xnf_rx_buf_mutex); 2253 if (xnfp->xnf_rx_hvcopy) { 2254 if (ddi_dma_unbind_handle(bdesc->dma_handle) != DDI_SUCCESS) 2255 goto out; 2256 ddi_dma_mem_free(&bdesc->acc_handle); 2257 ddi_dma_free_handle(&bdesc->dma_handle); 2258 kmem_free(bdesc, sizeof (*bdesc)); 2259 xnfp->xnf_rx_buffer_count--; 2260 } else { 2261 bdesc->next = xnfp->xnf_free_list; 2262 xnfp->xnf_free_list = bdesc; 2263 xnfp->xnf_rx_descs_free++; 2264 } 2265 out: 2266 mutex_exit(&xnfp->xnf_rx_buf_mutex); 2267 } 2268 2269 /* 2270 * Allocate a DMA-able xmit buffer, including a structure to 2271 * keep track of the buffer. Called with tx_buf_mutex held. 2272 */ 2273 static struct xnf_buffer_desc * 2274 xnf_alloc_tx_buffer(xnf_t *xnfp) 2275 { 2276 struct xnf_buffer_desc *bdesc; 2277 size_t len; 2278 2279 if ((bdesc = kmem_zalloc(sizeof (*bdesc), KM_NOSLEEP)) == NULL) 2280 return (NULL); 2281 2282 /* allocate a DMA access handle for receive buffer */ 2283 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &tx_buffer_dma_attr, 2284 0, 0, &bdesc->dma_handle) != DDI_SUCCESS) 2285 goto failure; 2286 2287 /* Allocate DMA-able memory for transmit buffer */ 2288 if (ddi_dma_mem_alloc(bdesc->dma_handle, 2289 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0, 2290 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 2291 goto failure_1; 2292 2293 bdesc->xnfp = xnfp; 2294 xnfp->xnf_tx_buffer_count++; 2295 2296 return (bdesc); 2297 2298 failure_1: 2299 ddi_dma_free_handle(&bdesc->dma_handle); 2300 2301 failure: 2302 kmem_free(bdesc, sizeof (*bdesc)); 2303 return (NULL); 2304 } 2305 2306 /* 2307 * Allocate a DMA-able receive buffer, including a structure to 2308 * keep track of the buffer. Called with rx_buf_mutex held. 2309 */ 2310 static struct xnf_buffer_desc * 2311 xnf_alloc_buffer(xnf_t *xnfp) 2312 { 2313 struct xnf_buffer_desc *bdesc; 2314 size_t len; 2315 uint_t ncookies; 2316 ddi_dma_cookie_t dma_cookie; 2317 long cnt; 2318 pfn_t pfn; 2319 2320 if (xnfp->xnf_rx_buffer_count >= xnfp->xnf_max_rx_bufs) 2321 return (NULL); 2322 2323 if ((bdesc = kmem_zalloc(sizeof (*bdesc), KM_NOSLEEP)) == NULL) 2324 return (NULL); 2325 2326 /* allocate a DMA access handle for receive buffer */ 2327 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &rx_buffer_dma_attr, 2328 0, 0, &bdesc->dma_handle) != DDI_SUCCESS) 2329 goto failure; 2330 2331 /* Allocate DMA-able memory for receive buffer */ 2332 if (ddi_dma_mem_alloc(bdesc->dma_handle, 2333 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0, 2334 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 2335 goto failure_1; 2336 2337 /* bind to virtual address of buffer to get physical address */ 2338 if (ddi_dma_addr_bind_handle(bdesc->dma_handle, NULL, 2339 bdesc->buf, PAGESIZE, DDI_DMA_READ | DDI_DMA_STREAMING, 2340 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies) != DDI_DMA_MAPPED) 2341 goto failure_2; 2342 2343 bdesc->buf_phys = dma_cookie.dmac_laddress; 2344 bdesc->xnfp = xnfp; 2345 if (xnfp->xnf_rx_hvcopy) { 2346 bdesc->free_rtn.free_func = xnf_copy_rcv_complete; 2347 } else { 2348 bdesc->free_rtn.free_func = xnf_rcv_complete; 2349 } 2350 bdesc->free_rtn.free_arg = (char *)bdesc; 2351 bdesc->grant_ref = GRANT_INVALID_REF; 2352 ASSERT(ncookies == 1); 2353 2354 xnfp->xnf_rx_buffer_count++; 2355 2356 if (!xnfp->xnf_rx_hvcopy) { 2357 /* 2358 * Unmap the page, and hand the machine page back 2359 * to xen so it can be used as a backend net buffer. 2360 */ 2361 pfn = xnf_btop(bdesc->buf_phys); 2362 cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn); 2363 if (cnt != 1) { 2364 cmn_err(CE_WARN, "unable to give a page back to the " 2365 "hypervisor\n"); 2366 } 2367 } 2368 2369 return (bdesc); 2370 2371 failure_2: 2372 ddi_dma_mem_free(&bdesc->acc_handle); 2373 2374 failure_1: 2375 ddi_dma_free_handle(&bdesc->dma_handle); 2376 2377 failure: 2378 kmem_free(bdesc, sizeof (*bdesc)); 2379 return (NULL); 2380 } 2381 2382 /* 2383 * Statistics. 2384 */ 2385 static char *xnf_aux_statistics[] = { 2386 "tx_cksum_deferred", 2387 "rx_cksum_no_need", 2388 "interrupts", 2389 "unclaimed_interrupts", 2390 "tx_pullup", 2391 "tx_pagebndry", 2392 "tx_attempt", 2393 "rx_no_ringbuf", 2394 "hvcopy_packet_processed", 2395 }; 2396 2397 static int 2398 xnf_kstat_aux_update(kstat_t *ksp, int flag) 2399 { 2400 xnf_t *xnfp; 2401 kstat_named_t *knp; 2402 2403 if (flag != KSTAT_READ) 2404 return (EACCES); 2405 2406 xnfp = ksp->ks_private; 2407 knp = ksp->ks_data; 2408 2409 /* 2410 * Assignment order must match that of the names in 2411 * xnf_aux_statistics. 2412 */ 2413 (knp++)->value.ui64 = xnfp->xnf_stat_tx_cksum_deferred; 2414 (knp++)->value.ui64 = xnfp->xnf_stat_rx_cksum_no_need; 2415 2416 (knp++)->value.ui64 = xnfp->xnf_stat_interrupts; 2417 (knp++)->value.ui64 = xnfp->xnf_stat_unclaimed_interrupts; 2418 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pullup; 2419 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pagebndry; 2420 (knp++)->value.ui64 = xnfp->xnf_stat_tx_attempt; 2421 (knp++)->value.ui64 = xnfp->xnf_stat_rx_no_ringbuf; 2422 2423 (knp++)->value.ui64 = xnfp->xnf_stat_hvcopy_packet_processed; 2424 2425 return (0); 2426 } 2427 2428 static boolean_t 2429 xnf_kstat_init(xnf_t *xnfp) 2430 { 2431 int nstat = sizeof (xnf_aux_statistics) / 2432 sizeof (xnf_aux_statistics[0]); 2433 char **cp = xnf_aux_statistics; 2434 kstat_named_t *knp; 2435 2436 /* 2437 * Create and initialise kstats. 2438 */ 2439 if ((xnfp->xnf_kstat_aux = kstat_create("xnf", 2440 ddi_get_instance(xnfp->xnf_devinfo), 2441 "aux_statistics", "net", KSTAT_TYPE_NAMED, 2442 nstat, 0)) == NULL) 2443 return (B_FALSE); 2444 2445 xnfp->xnf_kstat_aux->ks_private = xnfp; 2446 xnfp->xnf_kstat_aux->ks_update = xnf_kstat_aux_update; 2447 2448 knp = xnfp->xnf_kstat_aux->ks_data; 2449 while (nstat > 0) { 2450 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 2451 2452 knp++; 2453 cp++; 2454 nstat--; 2455 } 2456 2457 kstat_install(xnfp->xnf_kstat_aux); 2458 2459 return (B_TRUE); 2460 } 2461 2462 static int 2463 xnf_stat(void *arg, uint_t stat, uint64_t *val) 2464 { 2465 xnf_t *xnfp = arg; 2466 2467 mutex_enter(&xnfp->xnf_intrlock); 2468 mutex_enter(&xnfp->xnf_txlock); 2469 2470 #define mac_stat(q, r) \ 2471 case (MAC_STAT_##q): \ 2472 *val = xnfp->xnf_stat_##r; \ 2473 break 2474 2475 #define ether_stat(q, r) \ 2476 case (ETHER_STAT_##q): \ 2477 *val = xnfp->xnf_stat_##r; \ 2478 break 2479 2480 switch (stat) { 2481 2482 mac_stat(IPACKETS, ipackets); 2483 mac_stat(OPACKETS, opackets); 2484 mac_stat(RBYTES, rbytes); 2485 mac_stat(OBYTES, obytes); 2486 mac_stat(NORCVBUF, norxbuf); 2487 mac_stat(IERRORS, errrx); 2488 mac_stat(NOXMTBUF, tx_defer); 2489 2490 ether_stat(MACRCV_ERRORS, mac_rcv_error); 2491 ether_stat(TOOSHORT_ERRORS, runt); 2492 2493 /* always claim to be in full duplex mode */ 2494 case ETHER_STAT_LINK_DUPLEX: 2495 *val = LINK_DUPLEX_FULL; 2496 break; 2497 2498 /* always claim to be at 1Gb/s link speed */ 2499 case MAC_STAT_IFSPEED: 2500 *val = 1000000000ull; 2501 break; 2502 2503 default: 2504 mutex_exit(&xnfp->xnf_txlock); 2505 mutex_exit(&xnfp->xnf_intrlock); 2506 2507 return (ENOTSUP); 2508 } 2509 2510 #undef mac_stat 2511 #undef ether_stat 2512 2513 mutex_exit(&xnfp->xnf_txlock); 2514 mutex_exit(&xnfp->xnf_intrlock); 2515 2516 return (0); 2517 } 2518 2519 /*ARGSUSED*/ 2520 static void 2521 xnf_blank(void *arg, time_t ticks, uint_t count) 2522 { 2523 /* 2524 * XXPV dme: blanking is not currently implemented. 2525 * 2526 * It's not obvious how to use the 'ticks' argument here. 2527 * 2528 * 'Count' might be used as an indicator of how to set 2529 * rsp_event when posting receive buffers to the rx_ring. It 2530 * would replace the code at the tail of xnf_process_recv() 2531 * that simply indicates that the next completed packet should 2532 * cause an interrupt. 2533 */ 2534 } 2535 2536 static void 2537 xnf_resources(void *arg) 2538 { 2539 xnf_t *xnfp = arg; 2540 mac_rx_fifo_t mrf; 2541 2542 mrf.mrf_type = MAC_RX_FIFO; 2543 mrf.mrf_blank = xnf_blank; 2544 mrf.mrf_arg = (void *)xnfp; 2545 mrf.mrf_normal_blank_time = 128; /* XXPV dme: see xnf_blank() */ 2546 mrf.mrf_normal_pkt_count = 8; /* XXPV dme: see xnf_blank() */ 2547 2548 xnfp->xnf_rx_handle = mac_resource_add(xnfp->xnf_mh, 2549 (mac_resource_t *)&mrf); 2550 } 2551 2552 /*ARGSUSED*/ 2553 static void 2554 xnf_ioctl(void *arg, queue_t *q, mblk_t *mp) 2555 { 2556 miocnak(q, mp, 0, EINVAL); 2557 } 2558 2559 static boolean_t 2560 xnf_getcapab(void *arg, mac_capab_t cap, void *cap_data) 2561 { 2562 xnf_t *xnfp = arg; 2563 2564 switch (cap) { 2565 case MAC_CAPAB_HCKSUM: { 2566 uint32_t *capab = cap_data; 2567 2568 /* 2569 * Whilst the flag used to communicate with the IO 2570 * domain is called "NETTXF_csum_blank", the checksum 2571 * in the packet must contain the pseudo-header 2572 * checksum and not zero. 2573 * 2574 * To help out the IO domain, we might use 2575 * HCKSUM_INET_PARTIAL. Unfortunately our stack will 2576 * then use checksum offload for IPv6 packets, which 2577 * the IO domain can't handle. 2578 * 2579 * As a result, we declare outselves capable of 2580 * HCKSUM_INET_FULL_V4. This means that we receive 2581 * IPv4 packets from the stack with a blank checksum 2582 * field and must insert the pseudo-header checksum 2583 * before passing the packet to the IO domain. 2584 */ 2585 if (xnfp->xnf_cksum_offload) 2586 *capab = HCKSUM_INET_FULL_V4; 2587 else 2588 *capab = 0; 2589 break; 2590 } 2591 2592 case MAC_CAPAB_POLL: 2593 /* Just return B_TRUE. */ 2594 break; 2595 2596 default: 2597 return (B_FALSE); 2598 } 2599 2600 return (B_TRUE); 2601 } 2602 2603 /*ARGSUSED*/ 2604 static void 2605 oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 2606 void *arg, void *impl_data) 2607 { 2608 xnf_t *xnfp = ddi_get_driver_private(dip); 2609 XenbusState new_state = *(XenbusState *)impl_data; 2610 2611 ASSERT(xnfp != NULL); 2612 2613 switch (new_state) { 2614 case XenbusStateConnected: 2615 mutex_enter(&xnfp->xnf_intrlock); 2616 mutex_enter(&xnfp->xnf_txlock); 2617 2618 xnfp->xnf_connected = B_TRUE; 2619 /* 2620 * wake up threads wanting to send data to backend, 2621 * but got blocked due to backend is not ready 2622 */ 2623 cv_broadcast(&xnfp->xnf_cv); 2624 2625 mutex_exit(&xnfp->xnf_txlock); 2626 mutex_exit(&xnfp->xnf_intrlock); 2627 2628 /* 2629 * kick backend in case it missed any tx request 2630 * in the TX ring buffer 2631 */ 2632 ec_notify_via_evtchn(xnfp->xnf_evtchn); 2633 2634 /* 2635 * there maybe already queued rx data in the RX ring 2636 * sent by backend after it gets connected but before 2637 * we see its state change here, so we call our intr 2638 * handling routine to handle them, if any 2639 */ 2640 (void) xnf_intr((caddr_t)xnfp); 2641 2642 /* mark as link up after get connected */ 2643 mac_link_update(xnfp->xnf_mh, LINK_STATE_UP); 2644 2645 break; 2646 2647 default: 2648 break; 2649 } 2650 } 2651 2652 /* 2653 * Check whether backend is capable of and willing to talk 2654 * to us via hypervisor copy, as opposed to page flip. 2655 */ 2656 static boolean_t 2657 xnf_hvcopy_peer_status(dev_info_t *devinfo) 2658 { 2659 int be_rx_copy; 2660 int err; 2661 2662 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(devinfo), 2663 "feature-rx-copy", "%d", &be_rx_copy); 2664 /* 2665 * If we fail to read the store we assume that the key is 2666 * absent, implying an older domain at the far end. Older 2667 * domains cannot do HV copy (we assume ..). 2668 */ 2669 if (err != 0) 2670 be_rx_copy = 0; 2671 2672 return (be_rx_copy?B_TRUE:B_FALSE); 2673 } 2674