1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * 31 * Copyright (c) 2004 Christian Limpach. 32 * All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. This section intentionally left blank. 43 * 4. The name of the author may not be used to endorse or promote products 44 * derived from this software without specific prior written permission. 45 * 46 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 47 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 48 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 49 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 50 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 52 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 53 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 54 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 55 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 56 */ 57 /* 58 * Section 3 of the above license was updated in response to bug 6379571. 59 */ 60 61 /* 62 * xnf.c - Nemo-based network driver for domU 63 */ 64 65 #include <sys/types.h> 66 #include <sys/errno.h> 67 #include <sys/param.h> 68 #include <sys/sysmacros.h> 69 #include <sys/systm.h> 70 #include <sys/stream.h> 71 #include <sys/strsubr.h> 72 #include <sys/conf.h> 73 #include <sys/ddi.h> 74 #include <sys/devops.h> 75 #include <sys/sunddi.h> 76 #include <sys/sunndi.h> 77 #include <sys/dlpi.h> 78 #include <sys/ethernet.h> 79 #include <sys/strsun.h> 80 #include <sys/pattr.h> 81 #include <inet/ip.h> 82 #include <sys/modctl.h> 83 #include <sys/mac.h> 84 #include <sys/mac_ether.h> 85 #include <sys/bootinfo.h> 86 #include <sys/mach_mmu.h> 87 #ifdef XPV_HVM_DRIVER 88 #include <sys/xpv_support.h> 89 #include <sys/hypervisor.h> 90 #else 91 #include <sys/hypervisor.h> 92 #include <sys/evtchn_impl.h> 93 #include <sys/balloon_impl.h> 94 #endif 95 #include <xen/public/io/netif.h> 96 #include <sys/gnttab.h> 97 #include <xen/sys/xendev.h> 98 #include <sys/sdt.h> 99 100 #include <io/xnf.h> 101 102 103 /* 104 * Declarations and Module Linkage 105 */ 106 107 #define IDENT "Virtual Ethernet driver" 108 109 #if defined(DEBUG) || defined(__lint) 110 #define XNF_DEBUG 111 int xnfdebug = 0; 112 #endif 113 114 /* 115 * On a 32 bit PAE system physical and machine addresses are larger 116 * than 32 bits. ddi_btop() on such systems take an unsigned long 117 * argument, and so addresses above 4G are truncated before ddi_btop() 118 * gets to see them. To avoid this, code the shift operation here. 119 */ 120 #define xnf_btop(addr) ((addr) >> PAGESHIFT) 121 122 boolean_t xnf_cksum_offload = B_TRUE; 123 124 /* Default value for hypervisor-based copy operations */ 125 boolean_t xnf_rx_hvcopy = B_TRUE; 126 127 /* 128 * Should pages used for transmit be readonly for the peer? 129 */ 130 boolean_t xnf_tx_pages_readonly = B_FALSE; 131 /* 132 * Packets under this size are bcopied instead of using desballoc. 133 * Choose a value > XNF_FRAMESIZE (1514) to force the receive path to 134 * always copy. 135 */ 136 unsigned int xnf_rx_bcopy_thresh = 64; 137 138 unsigned int xnf_max_tx_frags = 1; 139 140 /* Required system entry points */ 141 static int xnf_attach(dev_info_t *, ddi_attach_cmd_t); 142 static int xnf_detach(dev_info_t *, ddi_detach_cmd_t); 143 144 /* Required driver entry points for Nemo */ 145 static int xnf_start(void *); 146 static void xnf_stop(void *); 147 static int xnf_set_mac_addr(void *, const uint8_t *); 148 static int xnf_set_multicast(void *, boolean_t, const uint8_t *); 149 static int xnf_set_promiscuous(void *, boolean_t); 150 static mblk_t *xnf_send(void *, mblk_t *); 151 static uint_t xnf_intr(caddr_t); 152 static int xnf_stat(void *, uint_t, uint64_t *); 153 static void xnf_blank(void *, time_t, uint_t); 154 static void xnf_resources(void *); 155 static void xnf_ioctl(void *, queue_t *, mblk_t *); 156 static boolean_t xnf_getcapab(void *, mac_capab_t, void *); 157 158 /* Driver private functions */ 159 static int xnf_alloc_dma_resources(xnf_t *); 160 static void xnf_release_dma_resources(xnf_t *); 161 static mblk_t *xnf_process_recv(xnf_t *); 162 static void xnf_rcv_complete(struct xnf_buffer_desc *); 163 static void xnf_release_mblks(xnf_t *); 164 static struct xnf_buffer_desc *xnf_alloc_tx_buffer(xnf_t *); 165 static struct xnf_buffer_desc *xnf_alloc_buffer(xnf_t *); 166 static struct xnf_buffer_desc *xnf_get_tx_buffer(xnf_t *); 167 static struct xnf_buffer_desc *xnf_get_buffer(xnf_t *); 168 static void xnf_free_buffer(struct xnf_buffer_desc *); 169 static void xnf_free_tx_buffer(struct xnf_buffer_desc *); 170 void xnf_send_driver_status(int, int); 171 static void rx_buffer_hang(xnf_t *, struct xnf_buffer_desc *); 172 static int xnf_clean_tx_ring(xnf_t *); 173 static void oe_state_change(dev_info_t *, ddi_eventcookie_t, 174 void *, void *); 175 static mblk_t *xnf_process_hvcopy_recv(xnf_t *xnfp); 176 static boolean_t xnf_hvcopy_peer_status(dev_info_t *devinfo); 177 static boolean_t xnf_kstat_init(xnf_t *xnfp); 178 179 /* 180 * XXPV dme: remove MC_IOCTL? 181 */ 182 static mac_callbacks_t xnf_callbacks = { 183 MC_RESOURCES | MC_IOCTL | MC_GETCAPAB, 184 xnf_stat, 185 xnf_start, 186 xnf_stop, 187 xnf_set_promiscuous, 188 xnf_set_multicast, 189 xnf_set_mac_addr, 190 xnf_send, 191 xnf_resources, 192 xnf_ioctl, 193 xnf_getcapab 194 }; 195 196 #define GRANT_INVALID_REF 0 197 const int xnf_rx_bufs_lowat = 4 * NET_RX_RING_SIZE; 198 const int xnf_rx_bufs_hiwat = 8 * NET_RX_RING_SIZE; /* default max */ 199 200 /* DMA attributes for network ring buffer */ 201 static ddi_dma_attr_t ringbuf_dma_attr = { 202 DMA_ATTR_V0, /* version of this structure */ 203 0, /* lowest usable address */ 204 0xffffffffffffffffULL, /* highest usable address */ 205 0x7fffffff, /* maximum DMAable byte count */ 206 MMU_PAGESIZE, /* alignment in bytes */ 207 0x7ff, /* bitmap of burst sizes */ 208 1, /* minimum transfer */ 209 0xffffffffU, /* maximum transfer */ 210 0xffffffffffffffffULL, /* maximum segment length */ 211 1, /* maximum number of segments */ 212 1, /* granularity */ 213 0, /* flags (reserved) */ 214 }; 215 216 /* DMA attributes for transmit data */ 217 static ddi_dma_attr_t tx_buffer_dma_attr = { 218 DMA_ATTR_V0, /* version of this structure */ 219 0, /* lowest usable address */ 220 0xffffffffffffffffULL, /* highest usable address */ 221 0x7fffffff, /* maximum DMAable byte count */ 222 MMU_PAGESIZE, /* alignment in bytes */ 223 0x7ff, /* bitmap of burst sizes */ 224 1, /* minimum transfer */ 225 0xffffffffU, /* maximum transfer */ 226 0xffffffffffffffffULL, /* maximum segment length */ 227 1, /* maximum number of segments */ 228 1, /* granularity */ 229 0, /* flags (reserved) */ 230 }; 231 232 /* DMA attributes for a receive buffer */ 233 static ddi_dma_attr_t rx_buffer_dma_attr = { 234 DMA_ATTR_V0, /* version of this structure */ 235 0, /* lowest usable address */ 236 0xffffffffffffffffULL, /* highest usable address */ 237 0x7fffffff, /* maximum DMAable byte count */ 238 MMU_PAGESIZE, /* alignment in bytes */ 239 0x7ff, /* bitmap of burst sizes */ 240 1, /* minimum transfer */ 241 0xffffffffU, /* maximum transfer */ 242 0xffffffffffffffffULL, /* maximum segment length */ 243 1, /* maximum number of segments */ 244 1, /* granularity */ 245 0, /* flags (reserved) */ 246 }; 247 248 /* DMA access attributes for registers and descriptors */ 249 static ddi_device_acc_attr_t accattr = { 250 DDI_DEVICE_ATTR_V0, 251 DDI_STRUCTURE_LE_ACC, /* This is a little-endian device */ 252 DDI_STRICTORDER_ACC 253 }; 254 255 /* DMA access attributes for data: NOT to be byte swapped. */ 256 static ddi_device_acc_attr_t data_accattr = { 257 DDI_DEVICE_ATTR_V0, 258 DDI_NEVERSWAP_ACC, 259 DDI_STRICTORDER_ACC 260 }; 261 262 unsigned char xnf_broadcastaddr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 263 int xnf_diagnose = 0; /* Patchable global for diagnostic purposes */ 264 265 DDI_DEFINE_STREAM_OPS(xnf_dev_ops, nulldev, nulldev, xnf_attach, xnf_detach, 266 nodev, NULL, D_MP, NULL); 267 268 static struct modldrv xnf_modldrv = { 269 &mod_driverops, /* Type of module. This one is a driver */ 270 IDENT " %I%", /* short description */ 271 &xnf_dev_ops /* driver specific ops */ 272 }; 273 274 static struct modlinkage modlinkage = { 275 MODREV_1, &xnf_modldrv, NULL 276 }; 277 278 int 279 _init(void) 280 { 281 int r; 282 283 mac_init_ops(&xnf_dev_ops, "xnf"); 284 r = mod_install(&modlinkage); 285 if (r != DDI_SUCCESS) 286 mac_fini_ops(&xnf_dev_ops); 287 288 return (r); 289 } 290 291 int 292 _fini(void) 293 { 294 return (EBUSY); /* XXPV dme: should be removable */ 295 } 296 297 int 298 _info(struct modinfo *modinfop) 299 { 300 return (mod_info(&modlinkage, modinfop)); 301 } 302 303 static int 304 xnf_setup_rings(xnf_t *xnfp) 305 { 306 int ix, err; 307 RING_IDX i; 308 struct xnf_buffer_desc *bdesc, *rbp; 309 struct xenbus_device *xsd; 310 domid_t oeid; 311 312 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 313 xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 314 315 if (xnfp->xnf_tx_ring_ref != GRANT_INVALID_REF) 316 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 317 318 err = gnttab_grant_foreign_access(oeid, 319 xnf_btop(pa_to_ma(xnfp->xnf_tx_ring_phys_addr)), 0); 320 if (err <= 0) { 321 err = -err; 322 xenbus_dev_error(xsd, err, "granting access to tx ring page"); 323 goto out; 324 } 325 xnfp->xnf_tx_ring_ref = (grant_ref_t)err; 326 327 if (xnfp->xnf_rx_ring_ref != GRANT_INVALID_REF) 328 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 329 330 err = gnttab_grant_foreign_access(oeid, 331 xnf_btop(pa_to_ma(xnfp->xnf_rx_ring_phys_addr)), 0); 332 if (err <= 0) { 333 err = -err; 334 xenbus_dev_error(xsd, err, "granting access to rx ring page"); 335 goto out; 336 } 337 xnfp->xnf_rx_ring_ref = (grant_ref_t)err; 338 339 340 mutex_enter(&xnfp->xnf_intrlock); 341 342 /* 343 * Cleanup the TX ring. We just clean up any valid tx_pktinfo structs 344 * and reset the ring. Note that this can lose packets after a resume, 345 * but we expect to stagger on. 346 */ 347 mutex_enter(&xnfp->xnf_txlock); 348 349 for (i = 0; i < xnfp->xnf_n_tx; i++) { 350 struct tx_pktinfo *txp = &xnfp->xnf_tx_pkt_info[i]; 351 352 txp->id = i + 1; 353 354 if (txp->grant_ref == GRANT_INVALID_REF) { 355 ASSERT(txp->mp == NULL); 356 ASSERT(txp->bdesc == NULL); 357 continue; 358 } 359 360 if (gnttab_query_foreign_access(txp->grant_ref) != 0) 361 panic("tx grant still in use by backend domain"); 362 363 freemsg(txp->mp); 364 txp->mp = NULL; 365 366 (void) ddi_dma_unbind_handle(txp->dma_handle); 367 368 if (txp->bdesc != NULL) { 369 xnf_free_tx_buffer(txp->bdesc); 370 txp->bdesc = NULL; 371 } 372 373 (void) gnttab_end_foreign_access_ref(txp->grant_ref, 374 xnfp->xnf_tx_pages_readonly); 375 gnttab_release_grant_reference(&xnfp->xnf_gref_tx_head, 376 txp->grant_ref); 377 txp->grant_ref = GRANT_INVALID_REF; 378 } 379 380 xnfp->xnf_tx_pkt_id_list = 0; 381 xnfp->xnf_tx_ring.rsp_cons = 0; 382 xnfp->xnf_tx_ring.sring->req_prod = 0; 383 xnfp->xnf_tx_ring.sring->rsp_prod = 0; 384 xnfp->xnf_tx_ring.sring->rsp_event = 1; 385 386 mutex_exit(&xnfp->xnf_txlock); 387 388 /* 389 * Rebuild the RX ring. We have to rebuild the RX ring because some of 390 * our pages are currently flipped out/granted so we can't just free 391 * the RX buffers. Reclaim any unprocessed recv buffers, they won't be 392 * useable anyway since the mfn's they refer to are no longer valid. 393 * Grant the backend domain access to each hung rx buffer. 394 */ 395 i = xnfp->xnf_rx_ring.rsp_cons; 396 while (i++ != xnfp->xnf_rx_ring.sring->req_prod) { 397 volatile netif_rx_request_t *rxrp; 398 399 rxrp = RING_GET_REQUEST(&xnfp->xnf_rx_ring, i); 400 ix = rxrp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0); 401 rbp = xnfp->xnf_rxpkt_bufptr[ix]; 402 if (rbp != NULL) { 403 grant_ref_t ref = rbp->grant_ref; 404 405 ASSERT(ref != GRANT_INVALID_REF); 406 if (xnfp->xnf_rx_hvcopy) { 407 pfn_t pfn = xnf_btop(rbp->buf_phys); 408 mfn_t mfn = pfn_to_mfn(pfn); 409 410 gnttab_grant_foreign_access_ref(ref, oeid, 411 mfn, 0); 412 } else { 413 gnttab_grant_foreign_transfer_ref(ref, oeid); 414 } 415 rxrp->id = ix; 416 rxrp->gref = ref; 417 } 418 } 419 420 /* 421 * Reset the ring pointers to initial state. 422 * Hang buffers for any empty ring slots. 423 */ 424 xnfp->xnf_rx_ring.rsp_cons = 0; 425 xnfp->xnf_rx_ring.sring->req_prod = 0; 426 xnfp->xnf_rx_ring.sring->rsp_prod = 0; 427 xnfp->xnf_rx_ring.sring->rsp_event = 1; 428 for (i = 0; i < NET_RX_RING_SIZE; i++) { 429 xnfp->xnf_rx_ring.req_prod_pvt = i; 430 if (xnfp->xnf_rxpkt_bufptr[i] != NULL) 431 continue; 432 if ((bdesc = xnf_get_buffer(xnfp)) == NULL) 433 break; 434 rx_buffer_hang(xnfp, bdesc); 435 } 436 xnfp->xnf_rx_ring.req_prod_pvt = i; 437 /* LINTED: constant in conditional context */ 438 RING_PUSH_REQUESTS(&xnfp->xnf_rx_ring); 439 440 mutex_exit(&xnfp->xnf_intrlock); 441 442 return (0); 443 444 out: 445 if (xnfp->xnf_tx_ring_ref != GRANT_INVALID_REF) 446 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 447 xnfp->xnf_tx_ring_ref = GRANT_INVALID_REF; 448 449 if (xnfp->xnf_rx_ring_ref != GRANT_INVALID_REF) 450 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 451 xnfp->xnf_rx_ring_ref = GRANT_INVALID_REF; 452 453 return (err); 454 } 455 456 457 /* Called when the upper layers free a message we passed upstream */ 458 static void 459 xnf_copy_rcv_complete(struct xnf_buffer_desc *bdesc) 460 { 461 (void) ddi_dma_unbind_handle(bdesc->dma_handle); 462 ddi_dma_mem_free(&bdesc->acc_handle); 463 ddi_dma_free_handle(&bdesc->dma_handle); 464 kmem_free(bdesc, sizeof (*bdesc)); 465 } 466 467 468 /* 469 * Connect driver to back end, called to set up communication with 470 * back end driver both initially and on resume after restore/migrate. 471 */ 472 void 473 xnf_be_connect(xnf_t *xnfp) 474 { 475 char mac[ETHERADDRL * 3]; 476 const char *message; 477 xenbus_transaction_t xbt; 478 struct xenbus_device *xsd; 479 char *xsname; 480 int err, be_no_cksum_offload; 481 482 ASSERT(!xnfp->xnf_connected); 483 484 xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 485 xsname = xvdi_get_xsname(xnfp->xnf_devinfo); 486 487 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->xnf_devinfo), "mac", 488 "%s", (char *)&mac[0]); 489 if (err != 0) { 490 /* 491 * bad: we're supposed to be set up with a proper mac 492 * addr. at this point 493 */ 494 cmn_err(CE_WARN, "%s%d: no mac address", 495 ddi_driver_name(xnfp->xnf_devinfo), 496 ddi_get_instance(xnfp->xnf_devinfo)); 497 return; 498 } 499 500 if (ether_aton(mac, xnfp->xnf_mac_addr) != ETHERADDRL) { 501 err = ENOENT; 502 xenbus_dev_error(xsd, ENOENT, "parsing %s/mac", xsname); 503 return; 504 } 505 506 err = xnf_setup_rings(xnfp); 507 if (err != 0) { 508 cmn_err(CE_WARN, "failed to set up tx/rx rings"); 509 xenbus_dev_error(xsd, err, "setting up ring"); 510 return; 511 } 512 513 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->xnf_devinfo), 514 "feature-no-csum-offload", "%d", &be_no_cksum_offload); 515 /* 516 * If we fail to read the store we assume that the key is 517 * absent, implying an older domain at the far end. Older 518 * domains always support checksum offload. 519 */ 520 if (err != 0) 521 be_no_cksum_offload = 0; 522 /* 523 * If the far end cannot do checksum offload or we do not wish 524 * to do it, disable it. 525 */ 526 if ((be_no_cksum_offload == 1) || !xnfp->xnf_cksum_offload) 527 xnfp->xnf_cksum_offload = B_FALSE; 528 529 again: 530 err = xenbus_transaction_start(&xbt); 531 if (err != 0) { 532 xenbus_dev_error(xsd, EIO, "starting transaction"); 533 return; 534 } 535 536 err = xenbus_printf(xbt, xsname, "tx-ring-ref", "%u", 537 xnfp->xnf_tx_ring_ref); 538 if (err != 0) { 539 message = "writing tx ring-ref"; 540 goto abort_transaction; 541 } 542 543 err = xenbus_printf(xbt, xsname, "rx-ring-ref", "%u", 544 xnfp->xnf_rx_ring_ref); 545 if (err != 0) { 546 message = "writing rx ring-ref"; 547 goto abort_transaction; 548 } 549 550 err = xenbus_printf(xbt, xsname, "event-channel", "%u", 551 xnfp->xnf_evtchn); 552 if (err != 0) { 553 message = "writing event-channel"; 554 goto abort_transaction; 555 } 556 557 err = xenbus_printf(xbt, xsname, "feature-rx-notify", "%d", 1); 558 if (err != 0) { 559 message = "writing feature-rx-notify"; 560 goto abort_transaction; 561 } 562 563 if (!xnfp->xnf_tx_pages_readonly) { 564 err = xenbus_printf(xbt, xsname, "feature-tx-writable", 565 "%d", 1); 566 if (err != 0) { 567 message = "writing feature-tx-writable"; 568 goto abort_transaction; 569 } 570 } 571 572 err = xenbus_printf(xbt, xsname, "feature-no-csum-offload", "%d", 573 xnfp->xnf_cksum_offload ? 0 : 1); 574 if (err != 0) { 575 message = "writing feature-no-csum-offload"; 576 goto abort_transaction; 577 } 578 err = xenbus_printf(xbt, xsname, "request-rx-copy", "%d", 579 xnfp->xnf_rx_hvcopy ? 1 : 0); 580 if (err != 0) { 581 message = "writing request-rx-copy"; 582 goto abort_transaction; 583 } 584 585 err = xenbus_printf(xbt, xsname, "state", "%d", XenbusStateConnected); 586 if (err != 0) { 587 message = "writing frontend XenbusStateConnected"; 588 goto abort_transaction; 589 } 590 591 err = xenbus_transaction_end(xbt, 0); 592 if (err != 0) { 593 if (err == EAGAIN) 594 goto again; 595 xenbus_dev_error(xsd, err, "completing transaction"); 596 } 597 598 return; 599 600 abort_transaction: 601 (void) xenbus_transaction_end(xbt, 1); 602 xenbus_dev_error(xsd, err, "%s", message); 603 } 604 605 /* 606 * attach(9E) -- Attach a device to the system 607 * 608 * Called once for each board successfully probed. 609 */ 610 static int 611 xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) 612 { 613 mac_register_t *macp; 614 xnf_t *xnfp; 615 int err; 616 617 #ifdef XNF_DEBUG 618 if (xnfdebug & XNF_DEBUG_DDI) 619 printf("xnf%d: attach(0x%p)\n", ddi_get_instance(devinfo), 620 (void *)devinfo); 621 #endif 622 623 switch (cmd) { 624 case DDI_RESUME: 625 xnfp = ddi_get_driver_private(devinfo); 626 627 (void) xvdi_resume(devinfo); 628 (void) xvdi_alloc_evtchn(devinfo); 629 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 630 #ifdef XPV_HVM_DRIVER 631 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, 632 xnfp); 633 #else 634 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, 635 (caddr_t)xnfp); 636 #endif 637 xnf_be_connect(xnfp); 638 /* 639 * Our MAC address may have changed if we're resuming: 640 * - on a different host 641 * - on the same one and got a different MAC address 642 * because we didn't specify one of our own. 643 * so it's useful to claim that it changed in order that 644 * IP send out a gratuitous ARP. 645 */ 646 mac_unicst_update(xnfp->xnf_mh, xnfp->xnf_mac_addr); 647 return (DDI_SUCCESS); 648 649 case DDI_ATTACH: 650 break; 651 652 default: 653 return (DDI_FAILURE); 654 } 655 656 /* 657 * Allocate gld_mac_info_t and xnf_instance structures 658 */ 659 macp = mac_alloc(MAC_VERSION); 660 if (macp == NULL) 661 return (DDI_FAILURE); 662 xnfp = kmem_zalloc(sizeof (*xnfp), KM_SLEEP); 663 664 macp->m_dip = devinfo; 665 macp->m_driver = xnfp; 666 xnfp->xnf_devinfo = devinfo; 667 668 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 669 macp->m_src_addr = xnfp->xnf_mac_addr; 670 macp->m_callbacks = &xnf_callbacks; 671 macp->m_min_sdu = 0; 672 macp->m_max_sdu = XNF_MAXPKT; 673 674 xnfp->xnf_running = B_FALSE; 675 xnfp->xnf_connected = B_FALSE; 676 xnfp->xnf_cksum_offload = xnf_cksum_offload; 677 xnfp->xnf_tx_pages_readonly = xnf_tx_pages_readonly; 678 679 xnfp->xnf_rx_hvcopy = xnf_hvcopy_peer_status(devinfo) && xnf_rx_hvcopy; 680 #ifdef XPV_HVM_DRIVER 681 if (!xnfp->xnf_rx_hvcopy) { 682 cmn_err(CE_WARN, "The xnf driver requires a dom0 that " 683 "supports 'feature-rx-copy'"); 684 goto failure; 685 } 686 #endif 687 688 /* 689 * Get the iblock cookie with which to initialize the mutexes. 690 */ 691 if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->xnf_icookie) 692 != DDI_SUCCESS) 693 goto failure; 694 /* 695 * Driver locking strategy: the txlock protects all paths 696 * through the driver, except the interrupt thread. 697 * If the interrupt thread needs to do something which could 698 * affect the operation of any other part of the driver, 699 * it needs to acquire the txlock mutex. 700 */ 701 mutex_init(&xnfp->xnf_tx_buf_mutex, 702 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 703 mutex_init(&xnfp->xnf_rx_buf_mutex, 704 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 705 mutex_init(&xnfp->xnf_txlock, 706 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 707 mutex_init(&xnfp->xnf_intrlock, 708 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 709 cv_init(&xnfp->xnf_cv, NULL, CV_DEFAULT, NULL); 710 711 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, 712 &xnfp->xnf_gref_tx_head) < 0) { 713 cmn_err(CE_WARN, "xnf%d: can't alloc tx grant refs", 714 ddi_get_instance(xnfp->xnf_devinfo)); 715 goto failure_1; 716 } 717 if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, 718 &xnfp->xnf_gref_rx_head) < 0) { 719 cmn_err(CE_WARN, "xnf%d: can't alloc rx grant refs", 720 ddi_get_instance(xnfp->xnf_devinfo)); 721 goto failure_1; 722 } 723 if (xnf_alloc_dma_resources(xnfp) == DDI_FAILURE) { 724 cmn_err(CE_WARN, "xnf%d: failed to allocate and initialize " 725 "driver data structures", 726 ddi_get_instance(xnfp->xnf_devinfo)); 727 goto failure_1; 728 } 729 730 xnfp->xnf_rx_ring.sring->rsp_event = 731 xnfp->xnf_tx_ring.sring->rsp_event = 1; 732 733 xnfp->xnf_tx_ring_ref = GRANT_INVALID_REF; 734 xnfp->xnf_rx_ring_ref = GRANT_INVALID_REF; 735 736 /* set driver private pointer now */ 737 ddi_set_driver_private(devinfo, xnfp); 738 739 if (xvdi_add_event_handler(devinfo, XS_OE_STATE, oe_state_change) 740 != DDI_SUCCESS) 741 goto failure_1; 742 743 if (!xnf_kstat_init(xnfp)) 744 goto failure_2; 745 746 /* 747 * Allocate an event channel, add the interrupt handler and 748 * bind it to the event channel. 749 */ 750 (void) xvdi_alloc_evtchn(devinfo); 751 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 752 #ifdef XPV_HVM_DRIVER 753 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, xnfp); 754 #else 755 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, (caddr_t)xnfp); 756 #endif 757 758 /* 759 * connect to the backend 760 */ 761 xnf_be_connect(xnfp); 762 763 err = mac_register(macp, &xnfp->xnf_mh); 764 mac_free(macp); 765 macp = NULL; 766 if (err != 0) 767 goto failure_3; 768 769 return (DDI_SUCCESS); 770 771 failure_3: 772 kstat_delete(xnfp->xnf_kstat_aux); 773 774 failure_2: 775 xvdi_remove_event_handler(devinfo, XS_OE_STATE); 776 #ifdef XPV_HVM_DRIVER 777 ec_unbind_evtchn(xnfp->xnf_evtchn); 778 #else 779 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 780 #endif 781 xnfp->xnf_evtchn = INVALID_EVTCHN; 782 783 failure_1: 784 xnf_release_dma_resources(xnfp); 785 cv_destroy(&xnfp->xnf_cv); 786 mutex_destroy(&xnfp->xnf_rx_buf_mutex); 787 mutex_destroy(&xnfp->xnf_txlock); 788 mutex_destroy(&xnfp->xnf_intrlock); 789 790 failure: 791 kmem_free(xnfp, sizeof (*xnfp)); 792 if (macp != NULL) 793 mac_free(macp); 794 795 return (DDI_FAILURE); 796 } 797 798 /* detach(9E) -- Detach a device from the system */ 799 static int 800 xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) 801 { 802 xnf_t *xnfp; /* Our private device info */ 803 int i; 804 805 #ifdef XNF_DEBUG 806 if (xnfdebug & XNF_DEBUG_DDI) 807 printf("xnf_detach(0x%p)\n", (void *)devinfo); 808 #endif 809 810 xnfp = ddi_get_driver_private(devinfo); 811 812 switch (cmd) { 813 case DDI_SUSPEND: 814 #ifdef XPV_HVM_DRIVER 815 ec_unbind_evtchn(xnfp->xnf_evtchn); 816 #else 817 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 818 #endif 819 820 xvdi_suspend(devinfo); 821 822 mutex_enter(&xnfp->xnf_intrlock); 823 mutex_enter(&xnfp->xnf_txlock); 824 825 xnfp->xnf_evtchn = INVALID_EVTCHN; 826 xnfp->xnf_connected = B_FALSE; 827 mutex_exit(&xnfp->xnf_txlock); 828 mutex_exit(&xnfp->xnf_intrlock); 829 return (DDI_SUCCESS); 830 831 case DDI_DETACH: 832 break; 833 834 default: 835 return (DDI_FAILURE); 836 } 837 838 if (xnfp->xnf_connected) 839 return (DDI_FAILURE); 840 841 /* Wait for receive buffers to be returned; give up after 5 seconds */ 842 i = 50; 843 844 mutex_enter(&xnfp->xnf_rx_buf_mutex); 845 while (xnfp->xnf_rx_bufs_outstanding > 0) { 846 mutex_exit(&xnfp->xnf_rx_buf_mutex); 847 delay(drv_usectohz(100000)); 848 if (--i == 0) { 849 cmn_err(CE_WARN, 850 "xnf%d: never reclaimed all the " 851 "receive buffers. Still have %d " 852 "buffers outstanding.", 853 ddi_get_instance(xnfp->xnf_devinfo), 854 xnfp->xnf_rx_bufs_outstanding); 855 return (DDI_FAILURE); 856 } 857 mutex_enter(&xnfp->xnf_rx_buf_mutex); 858 } 859 mutex_exit(&xnfp->xnf_rx_buf_mutex); 860 861 kstat_delete(xnfp->xnf_kstat_aux); 862 863 if (mac_unregister(xnfp->xnf_mh) != 0) 864 return (DDI_FAILURE); 865 866 /* Stop the receiver */ 867 xnf_stop(xnfp); 868 869 xvdi_remove_event_handler(devinfo, XS_OE_STATE); 870 871 /* Remove the interrupt */ 872 #ifdef XPV_HVM_DRIVER 873 ec_unbind_evtchn(xnfp->xnf_evtchn); 874 #else 875 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 876 #endif 877 878 /* Release any pending xmit mblks */ 879 xnf_release_mblks(xnfp); 880 881 /* Release all DMA resources */ 882 xnf_release_dma_resources(xnfp); 883 884 cv_destroy(&xnfp->xnf_cv); 885 mutex_destroy(&xnfp->xnf_rx_buf_mutex); 886 mutex_destroy(&xnfp->xnf_txlock); 887 mutex_destroy(&xnfp->xnf_intrlock); 888 889 kmem_free(xnfp, sizeof (*xnfp)); 890 891 return (DDI_SUCCESS); 892 } 893 894 /* 895 * xnf_set_mac_addr() -- set the physical network address on the board. 896 */ 897 /*ARGSUSED*/ 898 static int 899 xnf_set_mac_addr(void *arg, const uint8_t *macaddr) 900 { 901 xnf_t *xnfp = arg; 902 903 #ifdef XNF_DEBUG 904 if (xnfdebug & XNF_DEBUG_TRACE) 905 printf("xnf%d: set_mac_addr(0x%p): " 906 "%02x:%02x:%02x:%02x:%02x:%02x\n", 907 ddi_get_instance(xnfp->xnf_devinfo), 908 (void *)xnfp, macaddr[0], macaddr[1], macaddr[2], 909 macaddr[3], macaddr[4], macaddr[5]); 910 #endif 911 /* 912 * We can't set our macaddr. 913 * 914 * XXPV dme: Why not? 915 */ 916 return (ENOTSUP); 917 } 918 919 /* 920 * xnf_set_multicast() -- set (enable) or disable a multicast address. 921 * 922 * Program the hardware to enable/disable the multicast address 923 * in "mcast". Enable if "add" is true, disable if false. 924 */ 925 /*ARGSUSED*/ 926 static int 927 xnf_set_multicast(void *arg, boolean_t add, const uint8_t *mca) 928 { 929 xnf_t *xnfp = arg; 930 931 #ifdef XNF_DEBUG 932 if (xnfdebug & XNF_DEBUG_TRACE) 933 printf("xnf%d set_multicast(0x%p): " 934 "%02x:%02x:%02x:%02x:%02x:%02x\n", 935 ddi_get_instance(xnfp->xnf_devinfo), 936 (void *)xnfp, mca[0], mca[1], mca[2], 937 mca[3], mca[4], mca[5]); 938 #endif 939 940 /* 941 * XXPV dme: Ideally we'd relay the address to the backend for 942 * enabling. The protocol doesn't support that (interesting 943 * extension), so we simply succeed and hope that the relevant 944 * packets are going to arrive. 945 * 946 * If protocol support is added for enable/disable then we'll 947 * need to keep a list of those in use and re-add on resume. 948 */ 949 return (0); 950 } 951 952 /* 953 * xnf_set_promiscuous() -- set or reset promiscuous mode on the board 954 * 955 * Program the hardware to enable/disable promiscuous mode. 956 */ 957 /*ARGSUSED*/ 958 static int 959 xnf_set_promiscuous(void *arg, boolean_t on) 960 { 961 xnf_t *xnfp = arg; 962 963 #ifdef XNF_DEBUG 964 if (xnfdebug & XNF_DEBUG_TRACE) 965 printf("xnf%d set_promiscuous(0x%p, %x)\n", 966 ddi_get_instance(xnfp->xnf_devinfo), 967 (void *)xnfp, on); 968 #endif 969 /* 970 * We can't really do this, but we pretend that we can in 971 * order that snoop will work. 972 */ 973 return (0); 974 } 975 976 /* 977 * Clean buffers that we have responses for from the transmit ring. 978 */ 979 static int 980 xnf_clean_tx_ring(xnf_t *xnfp) 981 { 982 RING_IDX next_resp, i; 983 struct tx_pktinfo *reap; 984 int id; 985 grant_ref_t ref; 986 987 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 988 989 do { 990 /* 991 * index of next transmission ack 992 */ 993 next_resp = xnfp->xnf_tx_ring.sring->rsp_prod; 994 membar_consumer(); 995 /* 996 * Clean tx packets from ring that we have responses for 997 */ 998 for (i = xnfp->xnf_tx_ring.rsp_cons; i != next_resp; i++) { 999 id = RING_GET_RESPONSE(&xnfp->xnf_tx_ring, i)->id; 1000 reap = &xnfp->xnf_tx_pkt_info[id]; 1001 ref = reap->grant_ref; 1002 /* 1003 * Return id to free list 1004 */ 1005 reap->id = xnfp->xnf_tx_pkt_id_list; 1006 xnfp->xnf_tx_pkt_id_list = id; 1007 if (gnttab_query_foreign_access(ref) != 0) 1008 panic("tx grant still in use " 1009 "by backend domain"); 1010 (void) ddi_dma_unbind_handle(reap->dma_handle); 1011 (void) gnttab_end_foreign_access_ref(ref, 1012 xnfp->xnf_tx_pages_readonly); 1013 gnttab_release_grant_reference(&xnfp->xnf_gref_tx_head, 1014 ref); 1015 freemsg(reap->mp); 1016 reap->mp = NULL; 1017 reap->grant_ref = GRANT_INVALID_REF; 1018 if (reap->bdesc != NULL) 1019 xnf_free_tx_buffer(reap->bdesc); 1020 reap->bdesc = NULL; 1021 } 1022 xnfp->xnf_tx_ring.rsp_cons = next_resp; 1023 membar_enter(); 1024 } while (next_resp != xnfp->xnf_tx_ring.sring->rsp_prod); 1025 return (NET_TX_RING_SIZE - (xnfp->xnf_tx_ring.sring->req_prod - 1026 next_resp)); 1027 } 1028 1029 /* 1030 * If we need to pull up data from either a packet that crosses a page 1031 * boundary or consisting of multiple mblks, do it here. We allocate 1032 * a page aligned buffer and copy the data into it. The header for the 1033 * allocated buffer is returned. (which is also allocated here) 1034 */ 1035 static struct xnf_buffer_desc * 1036 xnf_pullupmsg(xnf_t *xnfp, mblk_t *mp) 1037 { 1038 struct xnf_buffer_desc *bdesc; 1039 mblk_t *mptr; 1040 caddr_t bp; 1041 int len; 1042 1043 /* 1044 * get a xmit buffer from the xmit buffer pool 1045 */ 1046 mutex_enter(&xnfp->xnf_rx_buf_mutex); 1047 bdesc = xnf_get_tx_buffer(xnfp); 1048 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1049 if (bdesc == NULL) 1050 return (bdesc); 1051 /* 1052 * Copy the data into the buffer 1053 */ 1054 xnfp->xnf_stat_tx_pullup++; 1055 bp = bdesc->buf; 1056 for (mptr = mp; mptr != NULL; mptr = mptr->b_cont) { 1057 len = mptr->b_wptr - mptr->b_rptr; 1058 bcopy(mptr->b_rptr, bp, len); 1059 bp += len; 1060 } 1061 return (bdesc); 1062 } 1063 1064 /* 1065 * xnf_send_one() -- send a packet 1066 * 1067 * Called when a packet is ready to be transmitted. A pointer to an 1068 * M_DATA message that contains the packet is passed to this routine. 1069 * At least the complete LLC header is contained in the message's 1070 * first message block, and the remainder of the packet is contained 1071 * within additional M_DATA message blocks linked to the first 1072 * message block. 1073 * 1074 */ 1075 static boolean_t 1076 xnf_send_one(xnf_t *xnfp, mblk_t *mp) 1077 { 1078 struct xnf_buffer_desc *xmitbuf; 1079 struct tx_pktinfo *txp_info; 1080 mblk_t *mptr; 1081 ddi_dma_cookie_t dma_cookie; 1082 RING_IDX slot, txs_out; 1083 int length = 0, i, pktlen = 0, rc, tx_id; 1084 int tx_ring_freespace, page_oops; 1085 uint_t ncookies; 1086 volatile netif_tx_request_t *txrp; 1087 caddr_t bufaddr; 1088 grant_ref_t ref; 1089 unsigned long mfn; 1090 uint32_t pflags; 1091 domid_t oeid; 1092 1093 #ifdef XNF_DEBUG 1094 if (xnfdebug & XNF_DEBUG_SEND) 1095 printf("xnf%d send(0x%p, 0x%p)\n", 1096 ddi_get_instance(xnfp->xnf_devinfo), 1097 (void *)xnfp, (void *)mp); 1098 #endif 1099 1100 ASSERT(mp != NULL); 1101 ASSERT(mp->b_next == NULL); 1102 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 1103 1104 tx_ring_freespace = xnf_clean_tx_ring(xnfp); 1105 ASSERT(tx_ring_freespace >= 0); 1106 1107 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 1108 xnfp->xnf_stat_tx_attempt++; 1109 /* 1110 * If there are no xmit ring slots available, return. 1111 */ 1112 if (tx_ring_freespace == 0) { 1113 xnfp->xnf_stat_tx_defer++; 1114 return (B_FALSE); /* Send should be retried */ 1115 } 1116 1117 slot = xnfp->xnf_tx_ring.sring->req_prod; 1118 /* Count the number of mblks in message and compute packet size */ 1119 for (i = 0, mptr = mp; mptr != NULL; mptr = mptr->b_cont, i++) 1120 pktlen += (mptr->b_wptr - mptr->b_rptr); 1121 1122 /* Make sure packet isn't too large */ 1123 if (pktlen > XNF_FRAMESIZE) { 1124 cmn_err(CE_WARN, "xnf%d: large packet %d bytes", 1125 ddi_get_instance(xnfp->xnf_devinfo), pktlen); 1126 freemsg(mp); 1127 return (B_FALSE); 1128 } 1129 1130 /* 1131 * Test if we cross a page boundary with our buffer 1132 */ 1133 page_oops = (i == 1) && 1134 (xnf_btop((size_t)mp->b_rptr) != 1135 xnf_btop((size_t)(mp->b_rptr + pktlen))); 1136 /* 1137 * XXPV - unfortunately, the Xen virtual net device currently 1138 * doesn't support multiple packet frags, so this will always 1139 * end up doing the pullup if we got more than one packet. 1140 */ 1141 if (i > xnf_max_tx_frags || page_oops) { 1142 if (page_oops) 1143 xnfp->xnf_stat_tx_pagebndry++; 1144 if ((xmitbuf = xnf_pullupmsg(xnfp, mp)) == NULL) { 1145 /* could not allocate resources? */ 1146 #ifdef XNF_DEBUG 1147 cmn_err(CE_WARN, "xnf%d: pullupmsg failed", 1148 ddi_get_instance(xnfp->xnf_devinfo)); 1149 #endif 1150 xnfp->xnf_stat_tx_defer++; 1151 return (B_FALSE); /* Retry send */ 1152 } 1153 bufaddr = xmitbuf->buf; 1154 } else { 1155 xmitbuf = NULL; 1156 bufaddr = (caddr_t)mp->b_rptr; 1157 } 1158 1159 /* set up data descriptor */ 1160 length = pktlen; 1161 1162 /* 1163 * Get packet id from free list 1164 */ 1165 tx_id = xnfp->xnf_tx_pkt_id_list; 1166 ASSERT(tx_id < NET_TX_RING_SIZE); 1167 txp_info = &xnfp->xnf_tx_pkt_info[tx_id]; 1168 xnfp->xnf_tx_pkt_id_list = txp_info->id; 1169 txp_info->id = tx_id; 1170 1171 /* Prepare for DMA mapping of tx buffer(s) */ 1172 rc = ddi_dma_addr_bind_handle(txp_info->dma_handle, 1173 NULL, bufaddr, length, DDI_DMA_WRITE | DDI_DMA_STREAMING, 1174 DDI_DMA_DONTWAIT, 0, &dma_cookie, &ncookies); 1175 if (rc != DDI_DMA_MAPPED) { 1176 ASSERT(rc != DDI_DMA_INUSE); 1177 ASSERT(rc != DDI_DMA_PARTIAL_MAP); 1178 /* 1179 * Return id to free list 1180 */ 1181 txp_info->id = xnfp->xnf_tx_pkt_id_list; 1182 xnfp->xnf_tx_pkt_id_list = tx_id; 1183 if (rc == DDI_DMA_NORESOURCES) { 1184 xnfp->xnf_stat_tx_defer++; 1185 return (B_FALSE); /* Retry later */ 1186 } 1187 #ifdef XNF_DEBUG 1188 cmn_err(CE_WARN, "xnf%d: bind_handle failed (%x)", 1189 ddi_get_instance(xnfp->xnf_devinfo), rc); 1190 #endif 1191 return (B_FALSE); 1192 } 1193 1194 ASSERT(ncookies == 1); 1195 ref = gnttab_claim_grant_reference(&xnfp->xnf_gref_tx_head); 1196 ASSERT((signed short)ref >= 0); 1197 mfn = xnf_btop(pa_to_ma((paddr_t)dma_cookie.dmac_laddress)); 1198 gnttab_grant_foreign_access_ref(ref, oeid, mfn, 1199 xnfp->xnf_tx_pages_readonly); 1200 txp_info->grant_ref = ref; 1201 txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot); 1202 txrp->gref = ref; 1203 txrp->size = dma_cookie.dmac_size; 1204 txrp->offset = (uintptr_t)bufaddr & PAGEOFFSET; 1205 txrp->id = tx_id; 1206 txrp->flags = 0; 1207 hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags); 1208 if (pflags != 0) { 1209 ASSERT(xnfp->xnf_cksum_offload); 1210 /* 1211 * If the local protocol stack requests checksum 1212 * offload we set the 'checksum blank' flag, 1213 * indicating to the peer that we need the checksum 1214 * calculated for us. 1215 * 1216 * We _don't_ set the validated flag, because we haven't 1217 * validated that the data and the checksum match. 1218 */ 1219 txrp->flags |= NETTXF_csum_blank; 1220 xnfp->xnf_stat_tx_cksum_deferred++; 1221 } 1222 membar_producer(); 1223 xnfp->xnf_tx_ring.sring->req_prod = slot + 1; 1224 1225 txp_info->mp = mp; 1226 txp_info->bdesc = xmitbuf; 1227 1228 txs_out = xnfp->xnf_tx_ring.sring->req_prod - 1229 xnfp->xnf_tx_ring.sring->rsp_prod; 1230 if (xnfp->xnf_tx_ring.sring->req_prod - xnfp->xnf_tx_ring.rsp_cons < 1231 XNF_TX_FREE_THRESH) { 1232 /* 1233 * The ring is getting full; Set up this packet 1234 * to cause an interrupt. 1235 */ 1236 xnfp->xnf_tx_ring.sring->rsp_event = 1237 xnfp->xnf_tx_ring.sring->rsp_prod + txs_out; 1238 } 1239 1240 xnfp->xnf_stat_opackets++; 1241 xnfp->xnf_stat_obytes += pktlen; 1242 1243 return (B_TRUE); /* successful transmit attempt */ 1244 } 1245 1246 mblk_t * 1247 xnf_send(void *arg, mblk_t *mp) 1248 { 1249 xnf_t *xnfp = arg; 1250 mblk_t *next; 1251 boolean_t sent_something = B_FALSE; 1252 1253 mutex_enter(&xnfp->xnf_txlock); 1254 1255 /* 1256 * Transmission attempts should be impossible without having 1257 * previously called xnf_start(). 1258 */ 1259 ASSERT(xnfp->xnf_running); 1260 1261 /* 1262 * Wait for getting connected to the backend 1263 */ 1264 while (!xnfp->xnf_connected) { 1265 cv_wait(&xnfp->xnf_cv, &xnfp->xnf_txlock); 1266 } 1267 1268 while (mp != NULL) { 1269 next = mp->b_next; 1270 mp->b_next = NULL; 1271 1272 if (!xnf_send_one(xnfp, mp)) { 1273 mp->b_next = next; 1274 break; 1275 } 1276 1277 mp = next; 1278 sent_something = B_TRUE; 1279 } 1280 1281 if (sent_something) 1282 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1283 1284 mutex_exit(&xnfp->xnf_txlock); 1285 1286 return (mp); 1287 } 1288 1289 /* 1290 * xnf_intr() -- ring interrupt service routine 1291 */ 1292 static uint_t 1293 xnf_intr(caddr_t arg) 1294 { 1295 xnf_t *xnfp = (xnf_t *)arg; 1296 int tx_ring_space; 1297 1298 mutex_enter(&xnfp->xnf_intrlock); 1299 1300 /* 1301 * If not connected to the peer or not started by the upper 1302 * layers we cannot usefully handle interrupts. 1303 */ 1304 if (!(xnfp->xnf_connected && xnfp->xnf_running)) { 1305 mutex_exit(&xnfp->xnf_intrlock); 1306 xnfp->xnf_stat_unclaimed_interrupts++; 1307 return (DDI_INTR_UNCLAIMED); 1308 } 1309 1310 #ifdef XNF_DEBUG 1311 if (xnfdebug & XNF_DEBUG_INT) 1312 printf("xnf%d intr(0x%p)\n", 1313 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1314 #endif 1315 if (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1316 mblk_t *mp; 1317 1318 if (xnfp->xnf_rx_hvcopy) 1319 mp = xnf_process_hvcopy_recv(xnfp); 1320 else 1321 mp = xnf_process_recv(xnfp); 1322 1323 if (mp != NULL) 1324 mac_rx(xnfp->xnf_mh, xnfp->xnf_rx_handle, mp); 1325 } 1326 1327 /* 1328 * Is tx ring nearly full? 1329 */ 1330 #define inuse(r) ((r).sring->req_prod - (r).rsp_cons) 1331 1332 if ((NET_TX_RING_SIZE - inuse(xnfp->xnf_tx_ring)) < 1333 XNF_TX_FREE_THRESH) { 1334 /* 1335 * Yes, clean it and try to start any blocked xmit 1336 * streams. 1337 */ 1338 mutex_enter(&xnfp->xnf_txlock); 1339 tx_ring_space = xnf_clean_tx_ring(xnfp); 1340 mutex_exit(&xnfp->xnf_txlock); 1341 if (tx_ring_space > XNF_TX_FREE_THRESH) { 1342 mutex_exit(&xnfp->xnf_intrlock); 1343 mac_tx_update(xnfp->xnf_mh); 1344 mutex_enter(&xnfp->xnf_intrlock); 1345 } else { 1346 /* 1347 * Schedule another tx interrupt when we have 1348 * sent enough packets to cross the threshold. 1349 */ 1350 xnfp->xnf_tx_ring.sring->rsp_event = 1351 xnfp->xnf_tx_ring.sring->rsp_prod + 1352 XNF_TX_FREE_THRESH - tx_ring_space + 1; 1353 } 1354 } 1355 #undef inuse 1356 1357 xnfp->xnf_stat_interrupts++; 1358 mutex_exit(&xnfp->xnf_intrlock); 1359 return (DDI_INTR_CLAIMED); /* indicate that the interrupt was for us */ 1360 } 1361 1362 /* 1363 * xnf_start() -- start the board receiving and enable interrupts. 1364 */ 1365 static int 1366 xnf_start(void *arg) 1367 { 1368 xnf_t *xnfp = arg; 1369 1370 #ifdef XNF_DEBUG 1371 if (xnfdebug & XNF_DEBUG_TRACE) 1372 printf("xnf%d start(0x%p)\n", 1373 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1374 #endif 1375 1376 mutex_enter(&xnfp->xnf_intrlock); 1377 mutex_enter(&xnfp->xnf_txlock); 1378 1379 /* Accept packets from above. */ 1380 xnfp->xnf_running = B_TRUE; 1381 1382 mutex_exit(&xnfp->xnf_txlock); 1383 mutex_exit(&xnfp->xnf_intrlock); 1384 1385 return (0); 1386 } 1387 1388 /* xnf_stop() - disable hardware */ 1389 static void 1390 xnf_stop(void *arg) 1391 { 1392 xnf_t *xnfp = arg; 1393 1394 #ifdef XNF_DEBUG 1395 if (xnfdebug & XNF_DEBUG_TRACE) 1396 printf("xnf%d stop(0x%p)\n", 1397 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1398 #endif 1399 1400 mutex_enter(&xnfp->xnf_intrlock); 1401 mutex_enter(&xnfp->xnf_txlock); 1402 1403 xnfp->xnf_running = B_FALSE; 1404 1405 mutex_exit(&xnfp->xnf_txlock); 1406 mutex_exit(&xnfp->xnf_intrlock); 1407 } 1408 1409 /* 1410 * Driver private functions follow 1411 */ 1412 1413 /* 1414 * Hang buffer on rx ring 1415 */ 1416 static void 1417 rx_buffer_hang(xnf_t *xnfp, struct xnf_buffer_desc *bdesc) 1418 { 1419 volatile netif_rx_request_t *reqp; 1420 RING_IDX hang_ix; 1421 grant_ref_t ref; 1422 domid_t oeid; 1423 1424 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 1425 1426 ASSERT(MUTEX_HELD(&xnfp->xnf_intrlock)); 1427 reqp = RING_GET_REQUEST(&xnfp->xnf_rx_ring, 1428 xnfp->xnf_rx_ring.req_prod_pvt); 1429 hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0)); 1430 ASSERT(xnfp->xnf_rxpkt_bufptr[hang_ix] == NULL); 1431 if (bdesc->grant_ref == GRANT_INVALID_REF) { 1432 ref = gnttab_claim_grant_reference(&xnfp->xnf_gref_rx_head); 1433 ASSERT((signed short)ref >= 0); 1434 bdesc->grant_ref = ref; 1435 if (xnfp->xnf_rx_hvcopy) { 1436 pfn_t pfn = xnf_btop(bdesc->buf_phys); 1437 mfn_t mfn = pfn_to_mfn(pfn); 1438 1439 gnttab_grant_foreign_access_ref(ref, oeid, mfn, 0); 1440 } else { 1441 gnttab_grant_foreign_transfer_ref(ref, oeid); 1442 } 1443 } 1444 reqp->id = hang_ix; 1445 reqp->gref = bdesc->grant_ref; 1446 bdesc->id = hang_ix; 1447 xnfp->xnf_rxpkt_bufptr[hang_ix] = bdesc; 1448 membar_producer(); 1449 xnfp->xnf_rx_ring.req_prod_pvt++; 1450 } 1451 1452 static mblk_t * 1453 xnf_process_hvcopy_recv(xnf_t *xnfp) 1454 { 1455 netif_rx_response_t *rxpkt; 1456 mblk_t *mp, *head, *tail; 1457 struct xnf_buffer_desc *bdesc; 1458 boolean_t hwcsum = B_FALSE, notify, work_to_do; 1459 size_t len; 1460 1461 /* 1462 * in loop over unconsumed responses, we do: 1463 * 1. get a response 1464 * 2. take corresponding buffer off recv. ring 1465 * 3. indicate this by setting slot to NULL 1466 * 4. create a new message and 1467 * 5. copy data in, adjust ptr 1468 * 1469 * outside loop: 1470 * 7. make sure no more data has arrived; kick HV 1471 */ 1472 1473 head = tail = NULL; 1474 1475 loop: 1476 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1477 1478 /* 1. */ 1479 rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring, 1480 xnfp->xnf_rx_ring.rsp_cons); 1481 1482 DTRACE_PROBE4(got_PKT, int, (int)rxpkt->id, int, 1483 (int)rxpkt->offset, 1484 int, (int)rxpkt->flags, int, (int)rxpkt->status); 1485 1486 /* 1487 * 2. 1488 * Take buffer off of receive ring 1489 */ 1490 hwcsum = B_FALSE; 1491 bdesc = xnfp->xnf_rxpkt_bufptr[rxpkt->id]; 1492 /* 3 */ 1493 xnfp->xnf_rxpkt_bufptr[rxpkt->id] = NULL; 1494 ASSERT(bdesc->id == rxpkt->id); 1495 if (rxpkt->status <= 0) { 1496 DTRACE_PROBE4(pkt_status_negative, int, rxpkt->status, 1497 char *, bdesc->buf, int, rxpkt->offset, 1498 char *, ((char *)bdesc->buf) + rxpkt->offset); 1499 mp = NULL; 1500 xnfp->xnf_stat_errrx++; 1501 if (rxpkt->status == 0) 1502 xnfp->xnf_stat_runt++; 1503 if (rxpkt->status == NETIF_RSP_ERROR) 1504 xnfp->xnf_stat_mac_rcv_error++; 1505 if (rxpkt->status == NETIF_RSP_DROPPED) 1506 xnfp->xnf_stat_norxbuf++; 1507 /* 1508 * re-hang the buffer 1509 */ 1510 rx_buffer_hang(xnfp, bdesc); 1511 } else { 1512 grant_ref_t ref = bdesc->grant_ref; 1513 struct xnf_buffer_desc *new_bdesc; 1514 unsigned long off = rxpkt->offset; 1515 1516 DTRACE_PROBE4(pkt_status_ok, int, rxpkt->status, 1517 char *, bdesc->buf, int, rxpkt->offset, 1518 char *, ((char *)bdesc->buf) + rxpkt->offset); 1519 len = rxpkt->status; 1520 ASSERT(off + len <= PAGEOFFSET); 1521 if (ref == GRANT_INVALID_REF) { 1522 mp = NULL; 1523 new_bdesc = bdesc; 1524 cmn_err(CE_WARN, "Bad rx grant reference %d " 1525 "from dom %d", ref, 1526 xvdi_get_oeid(xnfp->xnf_devinfo)); 1527 goto luckless; 1528 } 1529 /* 1530 * Release ref which we'll be re-claiming in 1531 * rx_buffer_hang(). 1532 */ 1533 bdesc->grant_ref = GRANT_INVALID_REF; 1534 (void) gnttab_end_foreign_access_ref(ref, 0); 1535 gnttab_release_grant_reference(&xnfp->xnf_gref_rx_head, 1536 ref); 1537 if (rxpkt->flags & NETRXF_data_validated) 1538 hwcsum = B_TRUE; 1539 1540 /* 1541 * XXPV for the initial implementation of HVcopy, 1542 * create a new msg and copy in the data 1543 */ 1544 /* 4. */ 1545 if ((mp = allocb(len, BPRI_MED)) == NULL) { 1546 /* 1547 * Couldn't get buffer to copy to, 1548 * drop this data, and re-hang 1549 * the buffer on the ring. 1550 */ 1551 xnfp->xnf_stat_norxbuf++; 1552 DTRACE_PROBE(alloc_nix); 1553 } else { 1554 /* 5. */ 1555 DTRACE_PROBE(alloc_ok); 1556 bcopy(bdesc->buf + off, mp->b_wptr, 1557 len); 1558 mp->b_wptr += len; 1559 } 1560 new_bdesc = bdesc; 1561 luckless: 1562 1563 /* Re-hang old or hang new buffer. */ 1564 rx_buffer_hang(xnfp, new_bdesc); 1565 } 1566 if (mp) { 1567 if (hwcsum) { 1568 /* 1569 * See comments in xnf_process_recv(). 1570 */ 1571 1572 (void) hcksum_assoc(mp, NULL, 1573 NULL, 0, 0, 0, 0, 1574 HCK_FULLCKSUM | 1575 HCK_FULLCKSUM_OK, 1576 0); 1577 xnfp->xnf_stat_rx_cksum_no_need++; 1578 } 1579 if (head == NULL) { 1580 head = tail = mp; 1581 } else { 1582 tail->b_next = mp; 1583 tail = mp; 1584 } 1585 1586 ASSERT(mp->b_next == NULL); 1587 1588 xnfp->xnf_stat_ipackets++; 1589 xnfp->xnf_stat_rbytes += len; 1590 } 1591 1592 xnfp->xnf_rx_ring.rsp_cons++; 1593 1594 xnfp->xnf_stat_hvcopy_packet_processed++; 1595 } 1596 1597 /* 7. */ 1598 /* 1599 * Has more data come in since we started? 1600 */ 1601 /* LINTED: constant in conditional context */ 1602 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_rx_ring, work_to_do); 1603 if (work_to_do) 1604 goto loop; 1605 1606 /* 1607 * Indicate to the backend that we have re-filled the receive 1608 * ring. 1609 */ 1610 /* LINTED: constant in conditional context */ 1611 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify); 1612 if (notify) 1613 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1614 1615 return (head); 1616 } 1617 1618 /* Process all queued received packets */ 1619 static mblk_t * 1620 xnf_process_recv(xnf_t *xnfp) 1621 { 1622 volatile netif_rx_response_t *rxpkt; 1623 mblk_t *mp, *head, *tail; 1624 struct xnf_buffer_desc *bdesc; 1625 extern mblk_t *desballoc(unsigned char *, size_t, uint_t, frtn_t *); 1626 boolean_t hwcsum = B_FALSE, notify, work_to_do; 1627 size_t len; 1628 pfn_t pfn; 1629 long cnt; 1630 1631 head = tail = NULL; 1632 loop: 1633 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1634 1635 rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring, 1636 xnfp->xnf_rx_ring.rsp_cons); 1637 1638 /* 1639 * Take buffer off of receive ring 1640 */ 1641 hwcsum = B_FALSE; 1642 bdesc = xnfp->xnf_rxpkt_bufptr[rxpkt->id]; 1643 xnfp->xnf_rxpkt_bufptr[rxpkt->id] = NULL; 1644 ASSERT(bdesc->id == rxpkt->id); 1645 if (rxpkt->status <= 0) { 1646 mp = NULL; 1647 xnfp->xnf_stat_errrx++; 1648 if (rxpkt->status == 0) 1649 xnfp->xnf_stat_runt++; 1650 if (rxpkt->status == NETIF_RSP_ERROR) 1651 xnfp->xnf_stat_mac_rcv_error++; 1652 if (rxpkt->status == NETIF_RSP_DROPPED) 1653 xnfp->xnf_stat_norxbuf++; 1654 /* 1655 * re-hang the buffer 1656 */ 1657 rx_buffer_hang(xnfp, bdesc); 1658 } else { 1659 grant_ref_t ref = bdesc->grant_ref; 1660 struct xnf_buffer_desc *new_bdesc; 1661 unsigned long off = rxpkt->offset; 1662 unsigned long mfn; 1663 1664 len = rxpkt->status; 1665 ASSERT(off + len <= PAGEOFFSET); 1666 if (ref == GRANT_INVALID_REF) { 1667 mp = NULL; 1668 new_bdesc = bdesc; 1669 cmn_err(CE_WARN, "Bad rx grant reference %d " 1670 "from dom %d", ref, 1671 xvdi_get_oeid(xnfp->xnf_devinfo)); 1672 goto luckless; 1673 } 1674 bdesc->grant_ref = GRANT_INVALID_REF; 1675 mfn = gnttab_end_foreign_transfer_ref(ref); 1676 ASSERT(mfn != MFN_INVALID); 1677 ASSERT(hat_getpfnum(kas.a_hat, bdesc->buf) == 1678 PFN_INVALID); 1679 1680 gnttab_release_grant_reference(&xnfp->xnf_gref_rx_head, 1681 ref); 1682 reassign_pfn(xnf_btop(bdesc->buf_phys), mfn); 1683 hat_devload(kas.a_hat, bdesc->buf, PAGESIZE, 1684 xnf_btop(bdesc->buf_phys), 1685 PROT_READ | PROT_WRITE, HAT_LOAD); 1686 balloon_drv_added(1); 1687 1688 if (rxpkt->flags & NETRXF_data_validated) 1689 hwcsum = B_TRUE; 1690 if (len <= xnf_rx_bcopy_thresh) { 1691 /* 1692 * For small buffers, just copy the data 1693 * and send the copy upstream. 1694 */ 1695 new_bdesc = NULL; 1696 } else { 1697 /* 1698 * We send a pointer to this data upstream; 1699 * we need a new buffer to replace this one. 1700 */ 1701 mutex_enter(&xnfp->xnf_rx_buf_mutex); 1702 new_bdesc = xnf_get_buffer(xnfp); 1703 if (new_bdesc != NULL) { 1704 xnfp->xnf_rx_bufs_outstanding++; 1705 } else { 1706 xnfp->xnf_stat_rx_no_ringbuf++; 1707 } 1708 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1709 } 1710 1711 if (new_bdesc == NULL) { 1712 /* 1713 * Don't have a new ring buffer; bcopy the data 1714 * from the buffer, and preserve the 1715 * original buffer 1716 */ 1717 if ((mp = allocb(len, BPRI_MED)) == NULL) { 1718 /* 1719 * Could't get buffer to copy to, 1720 * drop this data, and re-hang 1721 * the buffer on the ring. 1722 */ 1723 xnfp->xnf_stat_norxbuf++; 1724 } else { 1725 bcopy(bdesc->buf + off, mp->b_wptr, 1726 len); 1727 } 1728 /* 1729 * Give the buffer page back to xen 1730 */ 1731 pfn = xnf_btop(bdesc->buf_phys); 1732 cnt = balloon_free_pages(1, &mfn, bdesc->buf, 1733 &pfn); 1734 if (cnt != 1) { 1735 cmn_err(CE_WARN, "unable to give a " 1736 "page back to the hypervisor\n"); 1737 } 1738 new_bdesc = bdesc; 1739 } else { 1740 if ((mp = desballoc((unsigned char *)bdesc->buf, 1741 off + len, 0, (frtn_t *)bdesc)) == NULL) { 1742 /* 1743 * Couldn't get mblk to pass recv data 1744 * up with, free the old ring buffer 1745 */ 1746 xnfp->xnf_stat_norxbuf++; 1747 xnf_rcv_complete(bdesc); 1748 goto luckless; 1749 } 1750 (void) ddi_dma_sync(bdesc->dma_handle, 1751 0, 0, DDI_DMA_SYNC_FORCPU); 1752 1753 mp->b_wptr += off; 1754 mp->b_rptr += off; 1755 } 1756 luckless: 1757 if (mp) 1758 mp->b_wptr += len; 1759 /* re-hang old or hang new buffer */ 1760 rx_buffer_hang(xnfp, new_bdesc); 1761 } 1762 if (mp) { 1763 if (hwcsum) { 1764 /* 1765 * If the peer says that the data has 1766 * been validated then we declare that 1767 * the full checksum has been 1768 * verified. 1769 * 1770 * We don't look at the "checksum 1771 * blank" flag, and hence could have a 1772 * packet here that we are asserting 1773 * is good with a blank checksum. 1774 * 1775 * The hardware checksum offload 1776 * specification says that we must 1777 * provide the actual checksum as well 1778 * as an assertion that it is valid, 1779 * but the protocol stack doesn't 1780 * actually use it and some other 1781 * drivers don't bother, so we don't. 1782 * If it was necessary we could grovel 1783 * in the packet to find it. 1784 */ 1785 1786 (void) hcksum_assoc(mp, NULL, 1787 NULL, 0, 0, 0, 0, 1788 HCK_FULLCKSUM | 1789 HCK_FULLCKSUM_OK, 1790 0); 1791 xnfp->xnf_stat_rx_cksum_no_need++; 1792 } 1793 if (head == NULL) { 1794 head = tail = mp; 1795 } else { 1796 tail->b_next = mp; 1797 tail = mp; 1798 } 1799 1800 ASSERT(mp->b_next == NULL); 1801 1802 xnfp->xnf_stat_ipackets++; 1803 xnfp->xnf_stat_rbytes += len; 1804 } 1805 1806 xnfp->xnf_rx_ring.rsp_cons++; 1807 } 1808 1809 /* 1810 * Has more data come in since we started? 1811 */ 1812 /* LINTED: constant in conditional context */ 1813 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_rx_ring, work_to_do); 1814 if (work_to_do) 1815 goto loop; 1816 1817 /* 1818 * Indicate to the backend that we have re-filled the receive 1819 * ring. 1820 */ 1821 /* LINTED: constant in conditional context */ 1822 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify); 1823 if (notify) 1824 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1825 1826 return (head); 1827 } 1828 1829 /* Called when the upper layers free a message we passed upstream */ 1830 static void 1831 xnf_rcv_complete(struct xnf_buffer_desc *bdesc) 1832 { 1833 xnf_t *xnfp = bdesc->xnfp; 1834 pfn_t pfn; 1835 long cnt; 1836 1837 /* One less outstanding receive buffer */ 1838 mutex_enter(&xnfp->xnf_rx_buf_mutex); 1839 --xnfp->xnf_rx_bufs_outstanding; 1840 /* 1841 * Return buffer to the free list, unless the free list is getting 1842 * too large. XXPV - this threshold may need tuning. 1843 */ 1844 if (xnfp->xnf_rx_descs_free < xnf_rx_bufs_lowat) { 1845 /* 1846 * Unmap the page, and hand the machine page back 1847 * to xen so it can be re-used as a backend net buffer. 1848 */ 1849 pfn = xnf_btop(bdesc->buf_phys); 1850 cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn); 1851 if (cnt != 1) { 1852 cmn_err(CE_WARN, "unable to give a page back to the " 1853 "hypervisor\n"); 1854 } 1855 1856 bdesc->next = xnfp->xnf_free_list; 1857 xnfp->xnf_free_list = bdesc; 1858 xnfp->xnf_rx_descs_free++; 1859 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1860 } else { 1861 /* 1862 * We can return everything here since we have a free buffer 1863 * that we have not given the backing page for back to xen. 1864 */ 1865 --xnfp->xnf_rx_buffer_count; 1866 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1867 (void) ddi_dma_unbind_handle(bdesc->dma_handle); 1868 ddi_dma_mem_free(&bdesc->acc_handle); 1869 ddi_dma_free_handle(&bdesc->dma_handle); 1870 kmem_free(bdesc, sizeof (*bdesc)); 1871 } 1872 } 1873 1874 /* 1875 * xnf_alloc_dma_resources() -- initialize the drivers structures 1876 */ 1877 static int 1878 xnf_alloc_dma_resources(xnf_t *xnfp) 1879 { 1880 dev_info_t *devinfo = xnfp->xnf_devinfo; 1881 int i; 1882 size_t len; 1883 ddi_dma_cookie_t dma_cookie; 1884 uint_t ncookies; 1885 struct xnf_buffer_desc *bdesc; 1886 int rc; 1887 caddr_t rptr; 1888 1889 xnfp->xnf_n_rx = NET_RX_RING_SIZE; 1890 xnfp->xnf_max_rx_bufs = xnf_rx_bufs_hiwat; 1891 1892 xnfp->xnf_n_tx = NET_TX_RING_SIZE; 1893 1894 /* 1895 * The code below allocates all the DMA data structures that 1896 * need to be released when the driver is detached. 1897 * 1898 * First allocate handles for mapping (virtual address) pointers to 1899 * transmit data buffers to physical addresses 1900 */ 1901 for (i = 0; i < xnfp->xnf_n_tx; i++) { 1902 if ((rc = ddi_dma_alloc_handle(devinfo, 1903 &tx_buffer_dma_attr, DDI_DMA_SLEEP, 0, 1904 &xnfp->xnf_tx_pkt_info[i].dma_handle)) != DDI_SUCCESS) 1905 return (DDI_FAILURE); 1906 } 1907 1908 /* 1909 * Allocate page for the transmit descriptor ring. 1910 */ 1911 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 1912 DDI_DMA_SLEEP, 0, &xnfp->xnf_tx_ring_dma_handle) != DDI_SUCCESS) 1913 goto alloc_error; 1914 1915 if (ddi_dma_mem_alloc(xnfp->xnf_tx_ring_dma_handle, 1916 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 1917 DDI_DMA_SLEEP, 0, &rptr, &len, 1918 &xnfp->xnf_tx_ring_dma_acchandle) != DDI_SUCCESS) { 1919 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 1920 xnfp->xnf_tx_ring_dma_handle = NULL; 1921 goto alloc_error; 1922 } 1923 1924 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_tx_ring_dma_handle, NULL, 1925 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 1926 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 1927 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 1928 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 1929 xnfp->xnf_tx_ring_dma_handle = NULL; 1930 xnfp->xnf_tx_ring_dma_acchandle = NULL; 1931 if (rc == DDI_DMA_NORESOURCES) 1932 goto alloc_error; 1933 else 1934 goto error; 1935 } 1936 1937 ASSERT(ncookies == 1); 1938 bzero(rptr, PAGESIZE); 1939 /* LINTED: constant in conditional context */ 1940 SHARED_RING_INIT((netif_tx_sring_t *)rptr); 1941 /* LINTED: constant in conditional context */ 1942 FRONT_RING_INIT(&xnfp->xnf_tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE); 1943 xnfp->xnf_tx_ring_phys_addr = dma_cookie.dmac_laddress; 1944 1945 /* 1946 * Allocate page for the receive descriptor ring. 1947 */ 1948 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 1949 DDI_DMA_SLEEP, 0, &xnfp->xnf_rx_ring_dma_handle) != DDI_SUCCESS) 1950 goto alloc_error; 1951 1952 if (ddi_dma_mem_alloc(xnfp->xnf_rx_ring_dma_handle, 1953 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 1954 DDI_DMA_SLEEP, 0, &rptr, &len, 1955 &xnfp->xnf_rx_ring_dma_acchandle) != DDI_SUCCESS) { 1956 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 1957 xnfp->xnf_rx_ring_dma_handle = NULL; 1958 goto alloc_error; 1959 } 1960 1961 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_rx_ring_dma_handle, NULL, 1962 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 1963 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 1964 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 1965 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 1966 xnfp->xnf_rx_ring_dma_handle = NULL; 1967 xnfp->xnf_rx_ring_dma_acchandle = NULL; 1968 if (rc == DDI_DMA_NORESOURCES) 1969 goto alloc_error; 1970 else 1971 goto error; 1972 } 1973 1974 ASSERT(ncookies == 1); 1975 bzero(rptr, PAGESIZE); 1976 /* LINTED: constant in conditional context */ 1977 SHARED_RING_INIT((netif_rx_sring_t *)rptr); 1978 /* LINTED: constant in conditional context */ 1979 FRONT_RING_INIT(&xnfp->xnf_rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE); 1980 xnfp->xnf_rx_ring_phys_addr = dma_cookie.dmac_laddress; 1981 1982 /* 1983 * Preallocate receive buffers for each receive descriptor. 1984 */ 1985 1986 /* Set up the "free list" of receive buffer descriptors */ 1987 for (i = 0; i < xnfp->xnf_n_rx; i++) { 1988 if ((bdesc = xnf_alloc_buffer(xnfp)) == NULL) 1989 goto alloc_error; 1990 bdesc->next = xnfp->xnf_free_list; 1991 xnfp->xnf_free_list = bdesc; 1992 } 1993 1994 return (DDI_SUCCESS); 1995 1996 alloc_error: 1997 cmn_err(CE_WARN, "xnf%d: could not allocate enough DMA memory", 1998 ddi_get_instance(xnfp->xnf_devinfo)); 1999 error: 2000 xnf_release_dma_resources(xnfp); 2001 return (DDI_FAILURE); 2002 } 2003 2004 /* 2005 * Release all DMA resources in the opposite order from acquisition 2006 * Should not be called until all outstanding esballoc buffers 2007 * have been returned. 2008 */ 2009 static void 2010 xnf_release_dma_resources(xnf_t *xnfp) 2011 { 2012 int i; 2013 2014 /* 2015 * Free receive buffers which are currently associated with 2016 * descriptors 2017 */ 2018 for (i = 0; i < xnfp->xnf_n_rx; i++) { 2019 struct xnf_buffer_desc *bp; 2020 2021 if ((bp = xnfp->xnf_rxpkt_bufptr[i]) == NULL) 2022 continue; 2023 xnf_free_buffer(bp); 2024 xnfp->xnf_rxpkt_bufptr[i] = NULL; 2025 } 2026 2027 /* Free the receive ring buffer */ 2028 if (xnfp->xnf_rx_ring_dma_acchandle != NULL) { 2029 (void) ddi_dma_unbind_handle(xnfp->xnf_rx_ring_dma_handle); 2030 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 2031 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2032 xnfp->xnf_rx_ring_dma_acchandle = NULL; 2033 } 2034 /* Free the transmit ring buffer */ 2035 if (xnfp->xnf_tx_ring_dma_acchandle != NULL) { 2036 (void) ddi_dma_unbind_handle(xnfp->xnf_tx_ring_dma_handle); 2037 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 2038 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2039 xnfp->xnf_tx_ring_dma_acchandle = NULL; 2040 } 2041 } 2042 2043 static void 2044 xnf_release_mblks(xnf_t *xnfp) 2045 { 2046 int i; 2047 2048 for (i = 0; i < xnfp->xnf_n_tx; i++) { 2049 if (xnfp->xnf_tx_pkt_info[i].mp == NULL) 2050 continue; 2051 freemsg(xnfp->xnf_tx_pkt_info[i].mp); 2052 xnfp->xnf_tx_pkt_info[i].mp = NULL; 2053 (void) ddi_dma_unbind_handle( 2054 xnfp->xnf_tx_pkt_info[i].dma_handle); 2055 } 2056 } 2057 2058 /* 2059 * Remove a xmit buffer descriptor from the head of the free list and return 2060 * a pointer to it. If no buffers on list, attempt to allocate a new one. 2061 * Called with the tx_buf_mutex held. 2062 */ 2063 static struct xnf_buffer_desc * 2064 xnf_get_tx_buffer(xnf_t *xnfp) 2065 { 2066 struct xnf_buffer_desc *bdesc; 2067 2068 bdesc = xnfp->xnf_tx_free_list; 2069 if (bdesc != NULL) { 2070 xnfp->xnf_tx_free_list = bdesc->next; 2071 } else { 2072 bdesc = xnf_alloc_tx_buffer(xnfp); 2073 } 2074 return (bdesc); 2075 } 2076 2077 /* 2078 * Remove a buffer descriptor from the head of the free list and return 2079 * a pointer to it. If no buffers on list, attempt to allocate a new one. 2080 * Called with the rx_buf_mutex held. 2081 */ 2082 static struct xnf_buffer_desc * 2083 xnf_get_buffer(xnf_t *xnfp) 2084 { 2085 struct xnf_buffer_desc *bdesc; 2086 2087 bdesc = xnfp->xnf_free_list; 2088 if (bdesc != NULL) { 2089 xnfp->xnf_free_list = bdesc->next; 2090 xnfp->xnf_rx_descs_free--; 2091 } else { 2092 bdesc = xnf_alloc_buffer(xnfp); 2093 } 2094 return (bdesc); 2095 } 2096 2097 /* 2098 * Free a xmit buffer back to the xmit free list 2099 */ 2100 static void 2101 xnf_free_tx_buffer(struct xnf_buffer_desc *bp) 2102 { 2103 xnf_t *xnfp = bp->xnfp; 2104 2105 mutex_enter(&xnfp->xnf_tx_buf_mutex); 2106 bp->next = xnfp->xnf_tx_free_list; 2107 xnfp->xnf_tx_free_list = bp; 2108 mutex_exit(&xnfp->xnf_tx_buf_mutex); 2109 } 2110 2111 /* 2112 * Put a buffer descriptor onto the head of the free list. 2113 * for page-flip: 2114 * We can't really free these buffers back to the kernel 2115 * since we have given away their backing page to be used 2116 * by the back end net driver. 2117 * for hvcopy: 2118 * release all the memory 2119 */ 2120 static void 2121 xnf_free_buffer(struct xnf_buffer_desc *bdesc) 2122 { 2123 xnf_t *xnfp = bdesc->xnfp; 2124 2125 mutex_enter(&xnfp->xnf_rx_buf_mutex); 2126 if (xnfp->xnf_rx_hvcopy) { 2127 if (ddi_dma_unbind_handle(bdesc->dma_handle) != DDI_SUCCESS) 2128 goto out; 2129 ddi_dma_mem_free(&bdesc->acc_handle); 2130 ddi_dma_free_handle(&bdesc->dma_handle); 2131 kmem_free(bdesc, sizeof (*bdesc)); 2132 xnfp->xnf_rx_buffer_count--; 2133 } else { 2134 bdesc->next = xnfp->xnf_free_list; 2135 xnfp->xnf_free_list = bdesc; 2136 xnfp->xnf_rx_descs_free++; 2137 } 2138 out: 2139 mutex_exit(&xnfp->xnf_rx_buf_mutex); 2140 } 2141 2142 /* 2143 * Allocate a DMA-able xmit buffer, including a structure to 2144 * keep track of the buffer. Called with tx_buf_mutex held. 2145 */ 2146 static struct xnf_buffer_desc * 2147 xnf_alloc_tx_buffer(xnf_t *xnfp) 2148 { 2149 struct xnf_buffer_desc *bdesc; 2150 size_t len; 2151 2152 if ((bdesc = kmem_zalloc(sizeof (*bdesc), KM_NOSLEEP)) == NULL) 2153 return (NULL); 2154 2155 /* allocate a DMA access handle for receive buffer */ 2156 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &tx_buffer_dma_attr, 2157 0, 0, &bdesc->dma_handle) != DDI_SUCCESS) 2158 goto failure; 2159 2160 /* Allocate DMA-able memory for transmit buffer */ 2161 if (ddi_dma_mem_alloc(bdesc->dma_handle, 2162 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0, 2163 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 2164 goto failure_1; 2165 2166 bdesc->xnfp = xnfp; 2167 xnfp->xnf_tx_buffer_count++; 2168 2169 return (bdesc); 2170 2171 failure_1: 2172 ddi_dma_free_handle(&bdesc->dma_handle); 2173 2174 failure: 2175 kmem_free(bdesc, sizeof (*bdesc)); 2176 return (NULL); 2177 } 2178 2179 /* 2180 * Allocate a DMA-able receive buffer, including a structure to 2181 * keep track of the buffer. Called with rx_buf_mutex held. 2182 */ 2183 static struct xnf_buffer_desc * 2184 xnf_alloc_buffer(xnf_t *xnfp) 2185 { 2186 struct xnf_buffer_desc *bdesc; 2187 size_t len; 2188 uint_t ncookies; 2189 ddi_dma_cookie_t dma_cookie; 2190 long cnt; 2191 pfn_t pfn; 2192 2193 if (xnfp->xnf_rx_buffer_count >= xnfp->xnf_max_rx_bufs) 2194 return (NULL); 2195 2196 if ((bdesc = kmem_zalloc(sizeof (*bdesc), KM_NOSLEEP)) == NULL) 2197 return (NULL); 2198 2199 /* allocate a DMA access handle for receive buffer */ 2200 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &rx_buffer_dma_attr, 2201 0, 0, &bdesc->dma_handle) != DDI_SUCCESS) 2202 goto failure; 2203 2204 /* Allocate DMA-able memory for receive buffer */ 2205 if (ddi_dma_mem_alloc(bdesc->dma_handle, 2206 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0, 2207 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 2208 goto failure_1; 2209 2210 /* bind to virtual address of buffer to get physical address */ 2211 if (ddi_dma_addr_bind_handle(bdesc->dma_handle, NULL, 2212 bdesc->buf, PAGESIZE, DDI_DMA_READ | DDI_DMA_STREAMING, 2213 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies) != DDI_DMA_MAPPED) 2214 goto failure_2; 2215 2216 bdesc->buf_phys = dma_cookie.dmac_laddress; 2217 bdesc->xnfp = xnfp; 2218 if (xnfp->xnf_rx_hvcopy) { 2219 bdesc->free_rtn.free_func = xnf_copy_rcv_complete; 2220 } else { 2221 bdesc->free_rtn.free_func = xnf_rcv_complete; 2222 } 2223 bdesc->free_rtn.free_arg = (char *)bdesc; 2224 bdesc->grant_ref = GRANT_INVALID_REF; 2225 ASSERT(ncookies == 1); 2226 2227 xnfp->xnf_rx_buffer_count++; 2228 2229 if (!xnfp->xnf_rx_hvcopy) { 2230 /* 2231 * Unmap the page, and hand the machine page back 2232 * to xen so it can be used as a backend net buffer. 2233 */ 2234 pfn = xnf_btop(bdesc->buf_phys); 2235 cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn); 2236 if (cnt != 1) { 2237 cmn_err(CE_WARN, "unable to give a page back to the " 2238 "hypervisor\n"); 2239 } 2240 } 2241 2242 return (bdesc); 2243 2244 failure_2: 2245 ddi_dma_mem_free(&bdesc->acc_handle); 2246 2247 failure_1: 2248 ddi_dma_free_handle(&bdesc->dma_handle); 2249 2250 failure: 2251 kmem_free(bdesc, sizeof (*bdesc)); 2252 return (NULL); 2253 } 2254 2255 /* 2256 * Statistics. 2257 */ 2258 static char *xnf_aux_statistics[] = { 2259 "tx_cksum_deferred", 2260 "rx_cksum_no_need", 2261 "interrupts", 2262 "unclaimed_interrupts", 2263 "tx_pullup", 2264 "tx_pagebndry", 2265 "tx_attempt", 2266 "rx_no_ringbuf", 2267 "hvcopy_packet_processed", 2268 }; 2269 2270 static int 2271 xnf_kstat_aux_update(kstat_t *ksp, int flag) 2272 { 2273 xnf_t *xnfp; 2274 kstat_named_t *knp; 2275 2276 if (flag != KSTAT_READ) 2277 return (EACCES); 2278 2279 xnfp = ksp->ks_private; 2280 knp = ksp->ks_data; 2281 2282 /* 2283 * Assignment order must match that of the names in 2284 * xnf_aux_statistics. 2285 */ 2286 (knp++)->value.ui64 = xnfp->xnf_stat_tx_cksum_deferred; 2287 (knp++)->value.ui64 = xnfp->xnf_stat_rx_cksum_no_need; 2288 2289 (knp++)->value.ui64 = xnfp->xnf_stat_interrupts; 2290 (knp++)->value.ui64 = xnfp->xnf_stat_unclaimed_interrupts; 2291 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pullup; 2292 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pagebndry; 2293 (knp++)->value.ui64 = xnfp->xnf_stat_tx_attempt; 2294 (knp++)->value.ui64 = xnfp->xnf_stat_rx_no_ringbuf; 2295 2296 (knp++)->value.ui64 = xnfp->xnf_stat_hvcopy_packet_processed; 2297 2298 return (0); 2299 } 2300 2301 static boolean_t 2302 xnf_kstat_init(xnf_t *xnfp) 2303 { 2304 int nstat = sizeof (xnf_aux_statistics) / 2305 sizeof (xnf_aux_statistics[0]); 2306 char **cp = xnf_aux_statistics; 2307 kstat_named_t *knp; 2308 2309 /* 2310 * Create and initialise kstats. 2311 */ 2312 if ((xnfp->xnf_kstat_aux = kstat_create("xnf", 2313 ddi_get_instance(xnfp->xnf_devinfo), 2314 "aux_statistics", "net", KSTAT_TYPE_NAMED, 2315 nstat, 0)) == NULL) 2316 return (B_FALSE); 2317 2318 xnfp->xnf_kstat_aux->ks_private = xnfp; 2319 xnfp->xnf_kstat_aux->ks_update = xnf_kstat_aux_update; 2320 2321 knp = xnfp->xnf_kstat_aux->ks_data; 2322 while (nstat > 0) { 2323 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 2324 2325 knp++; 2326 cp++; 2327 nstat--; 2328 } 2329 2330 kstat_install(xnfp->xnf_kstat_aux); 2331 2332 return (B_TRUE); 2333 } 2334 2335 static int 2336 xnf_stat(void *arg, uint_t stat, uint64_t *val) 2337 { 2338 xnf_t *xnfp = arg; 2339 2340 mutex_enter(&xnfp->xnf_intrlock); 2341 mutex_enter(&xnfp->xnf_txlock); 2342 2343 #define mac_stat(q, r) \ 2344 case (MAC_STAT_##q): \ 2345 *val = xnfp->xnf_stat_##r; \ 2346 break 2347 2348 #define ether_stat(q, r) \ 2349 case (ETHER_STAT_##q): \ 2350 *val = xnfp->xnf_stat_##r; \ 2351 break 2352 2353 switch (stat) { 2354 2355 mac_stat(IPACKETS, ipackets); 2356 mac_stat(OPACKETS, opackets); 2357 mac_stat(RBYTES, rbytes); 2358 mac_stat(OBYTES, obytes); 2359 mac_stat(NORCVBUF, norxbuf); 2360 mac_stat(IERRORS, errrx); 2361 mac_stat(NOXMTBUF, tx_defer); 2362 2363 ether_stat(MACRCV_ERRORS, mac_rcv_error); 2364 ether_stat(TOOSHORT_ERRORS, runt); 2365 2366 default: 2367 mutex_exit(&xnfp->xnf_txlock); 2368 mutex_exit(&xnfp->xnf_intrlock); 2369 2370 return (ENOTSUP); 2371 } 2372 2373 #undef mac_stat 2374 #undef ether_stat 2375 2376 mutex_exit(&xnfp->xnf_txlock); 2377 mutex_exit(&xnfp->xnf_intrlock); 2378 2379 return (0); 2380 } 2381 2382 /*ARGSUSED*/ 2383 static void 2384 xnf_blank(void *arg, time_t ticks, uint_t count) 2385 { 2386 /* 2387 * XXPV dme: blanking is not currently implemented. 2388 * 2389 * It's not obvious how to use the 'ticks' argument here. 2390 * 2391 * 'Count' might be used as an indicator of how to set 2392 * rsp_event when posting receive buffers to the rx_ring. It 2393 * would replace the code at the tail of xnf_process_recv() 2394 * that simply indicates that the next completed packet should 2395 * cause an interrupt. 2396 */ 2397 } 2398 2399 static void 2400 xnf_resources(void *arg) 2401 { 2402 xnf_t *xnfp = arg; 2403 mac_rx_fifo_t mrf; 2404 2405 mrf.mrf_type = MAC_RX_FIFO; 2406 mrf.mrf_blank = xnf_blank; 2407 mrf.mrf_arg = (void *)xnfp; 2408 mrf.mrf_normal_blank_time = 128; /* XXPV dme: see xnf_blank() */ 2409 mrf.mrf_normal_pkt_count = 8; /* XXPV dme: see xnf_blank() */ 2410 2411 xnfp->xnf_rx_handle = mac_resource_add(xnfp->xnf_mh, 2412 (mac_resource_t *)&mrf); 2413 } 2414 2415 /*ARGSUSED*/ 2416 static void 2417 xnf_ioctl(void *arg, queue_t *q, mblk_t *mp) 2418 { 2419 miocnak(q, mp, 0, EINVAL); 2420 } 2421 2422 static boolean_t 2423 xnf_getcapab(void *arg, mac_capab_t cap, void *cap_data) 2424 { 2425 xnf_t *xnfp = arg; 2426 2427 switch (cap) { 2428 case MAC_CAPAB_HCKSUM: { 2429 uint32_t *capab = cap_data; 2430 2431 /* 2432 * We declare ourselves capable of HCKSUM_INET_PARTIAL 2433 * in order that the protocol stack insert the 2434 * pseudo-header checksum in packets that it passes 2435 * down to us. 2436 * 2437 * Whilst the flag used to communicate with dom0 is 2438 * called "NETTXF_csum_blank", the checksum in the 2439 * packet must contain the pseudo-header checksum and 2440 * not zero. (In fact, a Solaris dom0 is happy to deal 2441 * with a checksum of zero, but a Linux dom0 is not.) 2442 */ 2443 if (xnfp->xnf_cksum_offload) 2444 *capab = HCKSUM_INET_PARTIAL; 2445 else 2446 *capab = 0; 2447 break; 2448 } 2449 2450 case MAC_CAPAB_POLL: 2451 /* Just return B_TRUE. */ 2452 break; 2453 2454 default: 2455 return (B_FALSE); 2456 } 2457 2458 return (B_TRUE); 2459 } 2460 2461 /*ARGSUSED*/ 2462 static void 2463 oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 2464 void *arg, void *impl_data) 2465 { 2466 xnf_t *xnfp = ddi_get_driver_private(dip); 2467 XenbusState new_state = *(XenbusState *)impl_data; 2468 2469 ASSERT(xnfp != NULL); 2470 2471 switch (new_state) { 2472 case XenbusStateConnected: 2473 mutex_enter(&xnfp->xnf_intrlock); 2474 mutex_enter(&xnfp->xnf_txlock); 2475 2476 xnfp->xnf_connected = B_TRUE; 2477 cv_broadcast(&xnfp->xnf_cv); 2478 2479 mutex_exit(&xnfp->xnf_txlock); 2480 mutex_exit(&xnfp->xnf_intrlock); 2481 2482 ec_notify_via_evtchn(xnfp->xnf_evtchn); 2483 break; 2484 2485 default: 2486 break; 2487 } 2488 } 2489 2490 /* 2491 * Check whether backend is capable of and willing to talk 2492 * to us via hypervisor copy, as opposed to page flip. 2493 */ 2494 static boolean_t 2495 xnf_hvcopy_peer_status(dev_info_t *devinfo) 2496 { 2497 int be_rx_copy; 2498 int err; 2499 2500 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(devinfo), 2501 "feature-rx-copy", "%d", &be_rx_copy); 2502 /* 2503 * If we fail to read the store we assume that the key is 2504 * absent, implying an older domain at the far end. Older 2505 * domains cannot do HV copy (we assume ..). 2506 */ 2507 if (err != 0) 2508 be_rx_copy = 0; 2509 2510 return (be_rx_copy?B_TRUE:B_FALSE); 2511 } 2512