1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * 31 * Copyright (c) 2004 Christian Limpach. 32 * All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. This section intentionally left blank. 43 * 4. The name of the author may not be used to endorse or promote products 44 * derived from this software without specific prior written permission. 45 * 46 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 47 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 48 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 49 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 50 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 52 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 53 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 54 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 55 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 56 */ 57 /* 58 * Section 3 of the above license was updated in response to bug 6379571. 59 */ 60 61 /* 62 * xnf.c - Nemo-based network driver for domU 63 */ 64 65 #include <sys/types.h> 66 #include <sys/errno.h> 67 #include <sys/param.h> 68 #include <sys/sysmacros.h> 69 #include <sys/systm.h> 70 #include <sys/stream.h> 71 #include <sys/strsubr.h> 72 #include <sys/conf.h> 73 #include <sys/ddi.h> 74 #include <sys/devops.h> 75 #include <sys/sunddi.h> 76 #include <sys/sunndi.h> 77 #include <sys/dlpi.h> 78 #include <sys/ethernet.h> 79 #include <sys/strsun.h> 80 #include <sys/pattr.h> 81 #include <inet/ip.h> 82 #include <sys/modctl.h> 83 #include <sys/mac.h> 84 #include <sys/mac_ether.h> 85 #include <sys/bootinfo.h> 86 #include <sys/mach_mmu.h> 87 #ifdef XPV_HVM_DRIVER 88 #include <sys/xpv_support.h> 89 #include <sys/hypervisor.h> 90 #else 91 #include <sys/hypervisor.h> 92 #include <sys/evtchn_impl.h> 93 #include <sys/balloon_impl.h> 94 #endif 95 #include <xen/public/io/netif.h> 96 #include <sys/gnttab.h> 97 #include <xen/sys/xendev.h> 98 #include <sys/sdt.h> 99 100 #include <io/xnf.h> 101 102 103 /* 104 * Declarations and Module Linkage 105 */ 106 107 #define IDENT "Virtual Ethernet driver" 108 109 #if defined(DEBUG) || defined(__lint) 110 #define XNF_DEBUG 111 int xnfdebug = 0; 112 #endif 113 114 /* 115 * On a 32 bit PAE system physical and machine addresses are larger 116 * than 32 bits. ddi_btop() on such systems take an unsigned long 117 * argument, and so addresses above 4G are truncated before ddi_btop() 118 * gets to see them. To avoid this, code the shift operation here. 119 */ 120 #define xnf_btop(addr) ((addr) >> PAGESHIFT) 121 122 boolean_t xnf_cksum_offload = B_TRUE; 123 124 /* Default value for hypervisor-based copy operations */ 125 boolean_t xnf_rx_hvcopy = B_TRUE; 126 127 /* 128 * Should pages used for transmit be readonly for the peer? 129 */ 130 boolean_t xnf_tx_pages_readonly = B_FALSE; 131 /* 132 * Packets under this size are bcopied instead of using desballoc. 133 * Choose a value > XNF_FRAMESIZE (1514) to force the receive path to 134 * always copy. 135 */ 136 unsigned int xnf_rx_bcopy_thresh = 64; 137 138 unsigned int xnf_max_tx_frags = 1; 139 140 /* Required system entry points */ 141 static int xnf_attach(dev_info_t *, ddi_attach_cmd_t); 142 static int xnf_detach(dev_info_t *, ddi_detach_cmd_t); 143 144 /* Required driver entry points for Nemo */ 145 static int xnf_start(void *); 146 static void xnf_stop(void *); 147 static int xnf_set_mac_addr(void *, const uint8_t *); 148 static int xnf_set_multicast(void *, boolean_t, const uint8_t *); 149 static int xnf_set_promiscuous(void *, boolean_t); 150 static mblk_t *xnf_send(void *, mblk_t *); 151 static uint_t xnf_intr(caddr_t); 152 static int xnf_stat(void *, uint_t, uint64_t *); 153 static void xnf_blank(void *, time_t, uint_t); 154 static void xnf_resources(void *); 155 static void xnf_ioctl(void *, queue_t *, mblk_t *); 156 static boolean_t xnf_getcapab(void *, mac_capab_t, void *); 157 158 /* Driver private functions */ 159 static int xnf_alloc_dma_resources(xnf_t *); 160 static void xnf_release_dma_resources(xnf_t *); 161 static mblk_t *xnf_process_recv(xnf_t *); 162 static void xnf_rcv_complete(struct xnf_buffer_desc *); 163 static void xnf_release_mblks(xnf_t *); 164 static struct xnf_buffer_desc *xnf_alloc_tx_buffer(xnf_t *); 165 static struct xnf_buffer_desc *xnf_alloc_buffer(xnf_t *); 166 static struct xnf_buffer_desc *xnf_get_tx_buffer(xnf_t *); 167 static struct xnf_buffer_desc *xnf_get_buffer(xnf_t *); 168 static void xnf_free_buffer(struct xnf_buffer_desc *); 169 static void xnf_free_tx_buffer(struct xnf_buffer_desc *); 170 void xnf_send_driver_status(int, int); 171 static void rx_buffer_hang(xnf_t *, struct xnf_buffer_desc *); 172 static int xnf_clean_tx_ring(xnf_t *); 173 static void oe_state_change(dev_info_t *, ddi_eventcookie_t, 174 void *, void *); 175 static mblk_t *xnf_process_hvcopy_recv(xnf_t *xnfp); 176 static boolean_t xnf_hvcopy_peer_status(dev_info_t *devinfo); 177 static boolean_t xnf_kstat_init(xnf_t *xnfp); 178 179 /* 180 * XXPV dme: remove MC_IOCTL? 181 */ 182 static mac_callbacks_t xnf_callbacks = { 183 MC_RESOURCES | MC_IOCTL | MC_GETCAPAB, 184 xnf_stat, 185 xnf_start, 186 xnf_stop, 187 xnf_set_promiscuous, 188 xnf_set_multicast, 189 xnf_set_mac_addr, 190 xnf_send, 191 xnf_resources, 192 xnf_ioctl, 193 xnf_getcapab 194 }; 195 196 #define GRANT_INVALID_REF 0 197 const int xnf_rx_bufs_lowat = 4 * NET_RX_RING_SIZE; 198 const int xnf_rx_bufs_hiwat = 8 * NET_RX_RING_SIZE; /* default max */ 199 200 /* DMA attributes for network ring buffer */ 201 static ddi_dma_attr_t ringbuf_dma_attr = { 202 DMA_ATTR_V0, /* version of this structure */ 203 0, /* lowest usable address */ 204 0xffffffffffffffffULL, /* highest usable address */ 205 0x7fffffff, /* maximum DMAable byte count */ 206 MMU_PAGESIZE, /* alignment in bytes */ 207 0x7ff, /* bitmap of burst sizes */ 208 1, /* minimum transfer */ 209 0xffffffffU, /* maximum transfer */ 210 0xffffffffffffffffULL, /* maximum segment length */ 211 1, /* maximum number of segments */ 212 1, /* granularity */ 213 0, /* flags (reserved) */ 214 }; 215 216 /* DMA attributes for transmit data */ 217 static ddi_dma_attr_t tx_buffer_dma_attr = { 218 DMA_ATTR_V0, /* version of this structure */ 219 0, /* lowest usable address */ 220 0xffffffffffffffffULL, /* highest usable address */ 221 0x7fffffff, /* maximum DMAable byte count */ 222 MMU_PAGESIZE, /* alignment in bytes */ 223 0x7ff, /* bitmap of burst sizes */ 224 1, /* minimum transfer */ 225 0xffffffffU, /* maximum transfer */ 226 0xffffffffffffffffULL, /* maximum segment length */ 227 1, /* maximum number of segments */ 228 1, /* granularity */ 229 0, /* flags (reserved) */ 230 }; 231 232 /* DMA attributes for a receive buffer */ 233 static ddi_dma_attr_t rx_buffer_dma_attr = { 234 DMA_ATTR_V0, /* version of this structure */ 235 0, /* lowest usable address */ 236 0xffffffffffffffffULL, /* highest usable address */ 237 0x7fffffff, /* maximum DMAable byte count */ 238 MMU_PAGESIZE, /* alignment in bytes */ 239 0x7ff, /* bitmap of burst sizes */ 240 1, /* minimum transfer */ 241 0xffffffffU, /* maximum transfer */ 242 0xffffffffffffffffULL, /* maximum segment length */ 243 1, /* maximum number of segments */ 244 1, /* granularity */ 245 0, /* flags (reserved) */ 246 }; 247 248 /* DMA access attributes for registers and descriptors */ 249 static ddi_device_acc_attr_t accattr = { 250 DDI_DEVICE_ATTR_V0, 251 DDI_STRUCTURE_LE_ACC, /* This is a little-endian device */ 252 DDI_STRICTORDER_ACC 253 }; 254 255 /* DMA access attributes for data: NOT to be byte swapped. */ 256 static ddi_device_acc_attr_t data_accattr = { 257 DDI_DEVICE_ATTR_V0, 258 DDI_NEVERSWAP_ACC, 259 DDI_STRICTORDER_ACC 260 }; 261 262 unsigned char xnf_broadcastaddr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 263 int xnf_diagnose = 0; /* Patchable global for diagnostic purposes */ 264 265 DDI_DEFINE_STREAM_OPS(xnf_dev_ops, nulldev, nulldev, xnf_attach, xnf_detach, 266 nodev, NULL, D_MP, NULL); 267 268 static struct modldrv xnf_modldrv = { 269 &mod_driverops, /* Type of module. This one is a driver */ 270 IDENT " 1.11", /* short description */ 271 &xnf_dev_ops /* driver specific ops */ 272 }; 273 274 static struct modlinkage modlinkage = { 275 MODREV_1, &xnf_modldrv, NULL 276 }; 277 278 int 279 _init(void) 280 { 281 int r; 282 283 mac_init_ops(&xnf_dev_ops, "xnf"); 284 r = mod_install(&modlinkage); 285 if (r != DDI_SUCCESS) 286 mac_fini_ops(&xnf_dev_ops); 287 288 return (r); 289 } 290 291 int 292 _fini(void) 293 { 294 return (EBUSY); /* XXPV dme: should be removable */ 295 } 296 297 int 298 _info(struct modinfo *modinfop) 299 { 300 return (mod_info(&modlinkage, modinfop)); 301 } 302 303 static int 304 xnf_setup_rings(xnf_t *xnfp) 305 { 306 int ix, err; 307 RING_IDX i; 308 struct xnf_buffer_desc *bdesc, *rbp; 309 struct xenbus_device *xsd; 310 domid_t oeid; 311 312 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 313 xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 314 315 if (xnfp->xnf_tx_ring_ref != GRANT_INVALID_REF) 316 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 317 318 err = gnttab_grant_foreign_access(oeid, 319 xnf_btop(pa_to_ma(xnfp->xnf_tx_ring_phys_addr)), 0); 320 if (err <= 0) { 321 err = -err; 322 xenbus_dev_error(xsd, err, "granting access to tx ring page"); 323 goto out; 324 } 325 xnfp->xnf_tx_ring_ref = (grant_ref_t)err; 326 327 if (xnfp->xnf_rx_ring_ref != GRANT_INVALID_REF) 328 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 329 330 err = gnttab_grant_foreign_access(oeid, 331 xnf_btop(pa_to_ma(xnfp->xnf_rx_ring_phys_addr)), 0); 332 if (err <= 0) { 333 err = -err; 334 xenbus_dev_error(xsd, err, "granting access to rx ring page"); 335 goto out; 336 } 337 xnfp->xnf_rx_ring_ref = (grant_ref_t)err; 338 339 340 mutex_enter(&xnfp->xnf_intrlock); 341 342 /* 343 * Cleanup the TX ring. We just clean up any valid tx_pktinfo structs 344 * and reset the ring. Note that this can lose packets after a resume, 345 * but we expect to stagger on. 346 */ 347 mutex_enter(&xnfp->xnf_txlock); 348 349 for (i = 0; i < xnfp->xnf_n_tx; i++) { 350 struct tx_pktinfo *txp = &xnfp->xnf_tx_pkt_info[i]; 351 352 txp->id = i + 1; 353 354 if (txp->grant_ref == GRANT_INVALID_REF) { 355 ASSERT(txp->mp == NULL); 356 ASSERT(txp->bdesc == NULL); 357 continue; 358 } 359 360 if (gnttab_query_foreign_access(txp->grant_ref) != 0) 361 panic("tx grant still in use by backend domain"); 362 363 freemsg(txp->mp); 364 txp->mp = NULL; 365 366 (void) ddi_dma_unbind_handle(txp->dma_handle); 367 368 if (txp->bdesc != NULL) { 369 xnf_free_tx_buffer(txp->bdesc); 370 txp->bdesc = NULL; 371 } 372 373 (void) gnttab_end_foreign_access_ref(txp->grant_ref, 374 xnfp->xnf_tx_pages_readonly); 375 gnttab_release_grant_reference(&xnfp->xnf_gref_tx_head, 376 txp->grant_ref); 377 txp->grant_ref = GRANT_INVALID_REF; 378 } 379 380 xnfp->xnf_tx_pkt_id_list = 0; 381 xnfp->xnf_tx_ring.rsp_cons = 0; 382 xnfp->xnf_tx_ring.req_prod_pvt = 0; 383 xnfp->xnf_tx_ring.sring->req_prod = 0; 384 xnfp->xnf_tx_ring.sring->rsp_prod = 0; 385 xnfp->xnf_tx_ring.sring->rsp_event = 1; 386 387 mutex_exit(&xnfp->xnf_txlock); 388 389 /* 390 * Rebuild the RX ring. We have to rebuild the RX ring because some of 391 * our pages are currently flipped out/granted so we can't just free 392 * the RX buffers. Reclaim any unprocessed recv buffers, they won't be 393 * useable anyway since the mfn's they refer to are no longer valid. 394 * Grant the backend domain access to each hung rx buffer. 395 */ 396 i = xnfp->xnf_rx_ring.rsp_cons; 397 while (i++ != xnfp->xnf_rx_ring.sring->req_prod) { 398 volatile netif_rx_request_t *rxrp; 399 400 rxrp = RING_GET_REQUEST(&xnfp->xnf_rx_ring, i); 401 ix = rxrp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0); 402 rbp = xnfp->xnf_rxpkt_bufptr[ix]; 403 if (rbp != NULL) { 404 grant_ref_t ref = rbp->grant_ref; 405 406 ASSERT(ref != GRANT_INVALID_REF); 407 if (xnfp->xnf_rx_hvcopy) { 408 pfn_t pfn = xnf_btop(rbp->buf_phys); 409 mfn_t mfn = pfn_to_mfn(pfn); 410 411 gnttab_grant_foreign_access_ref(ref, oeid, 412 mfn, 0); 413 } else { 414 gnttab_grant_foreign_transfer_ref(ref, 415 oeid, 0); 416 } 417 rxrp->id = ix; 418 rxrp->gref = ref; 419 } 420 } 421 422 /* 423 * Reset the ring pointers to initial state. 424 * Hang buffers for any empty ring slots. 425 */ 426 xnfp->xnf_rx_ring.rsp_cons = 0; 427 xnfp->xnf_rx_ring.req_prod_pvt = 0; 428 xnfp->xnf_rx_ring.sring->req_prod = 0; 429 xnfp->xnf_rx_ring.sring->rsp_prod = 0; 430 xnfp->xnf_rx_ring.sring->rsp_event = 1; 431 for (i = 0; i < NET_RX_RING_SIZE; i++) { 432 xnfp->xnf_rx_ring.req_prod_pvt = i; 433 if (xnfp->xnf_rxpkt_bufptr[i] != NULL) 434 continue; 435 if ((bdesc = xnf_get_buffer(xnfp)) == NULL) 436 break; 437 rx_buffer_hang(xnfp, bdesc); 438 } 439 xnfp->xnf_rx_ring.req_prod_pvt = i; 440 /* LINTED: constant in conditional context */ 441 RING_PUSH_REQUESTS(&xnfp->xnf_rx_ring); 442 443 mutex_exit(&xnfp->xnf_intrlock); 444 445 return (0); 446 447 out: 448 if (xnfp->xnf_tx_ring_ref != GRANT_INVALID_REF) 449 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 450 xnfp->xnf_tx_ring_ref = GRANT_INVALID_REF; 451 452 if (xnfp->xnf_rx_ring_ref != GRANT_INVALID_REF) 453 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 454 xnfp->xnf_rx_ring_ref = GRANT_INVALID_REF; 455 456 return (err); 457 } 458 459 460 /* Called when the upper layers free a message we passed upstream */ 461 static void 462 xnf_copy_rcv_complete(struct xnf_buffer_desc *bdesc) 463 { 464 (void) ddi_dma_unbind_handle(bdesc->dma_handle); 465 ddi_dma_mem_free(&bdesc->acc_handle); 466 ddi_dma_free_handle(&bdesc->dma_handle); 467 kmem_free(bdesc, sizeof (*bdesc)); 468 } 469 470 471 /* 472 * Connect driver to back end, called to set up communication with 473 * back end driver both initially and on resume after restore/migrate. 474 */ 475 void 476 xnf_be_connect(xnf_t *xnfp) 477 { 478 char mac[ETHERADDRL * 3]; 479 const char *message; 480 xenbus_transaction_t xbt; 481 struct xenbus_device *xsd; 482 char *xsname; 483 int err, be_no_cksum_offload; 484 485 ASSERT(!xnfp->xnf_connected); 486 487 xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 488 xsname = xvdi_get_xsname(xnfp->xnf_devinfo); 489 490 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->xnf_devinfo), "mac", 491 "%s", (char *)&mac[0]); 492 if (err != 0) { 493 /* 494 * bad: we're supposed to be set up with a proper mac 495 * addr. at this point 496 */ 497 cmn_err(CE_WARN, "%s%d: no mac address", 498 ddi_driver_name(xnfp->xnf_devinfo), 499 ddi_get_instance(xnfp->xnf_devinfo)); 500 return; 501 } 502 503 if (ether_aton(mac, xnfp->xnf_mac_addr) != ETHERADDRL) { 504 err = ENOENT; 505 xenbus_dev_error(xsd, ENOENT, "parsing %s/mac", xsname); 506 return; 507 } 508 509 err = xnf_setup_rings(xnfp); 510 if (err != 0) { 511 cmn_err(CE_WARN, "failed to set up tx/rx rings"); 512 xenbus_dev_error(xsd, err, "setting up ring"); 513 return; 514 } 515 516 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->xnf_devinfo), 517 "feature-no-csum-offload", "%d", &be_no_cksum_offload); 518 /* 519 * If we fail to read the store we assume that the key is 520 * absent, implying an older domain at the far end. Older 521 * domains always support checksum offload. 522 */ 523 if (err != 0) 524 be_no_cksum_offload = 0; 525 /* 526 * If the far end cannot do checksum offload or we do not wish 527 * to do it, disable it. 528 */ 529 if ((be_no_cksum_offload == 1) || !xnfp->xnf_cksum_offload) 530 xnfp->xnf_cksum_offload = B_FALSE; 531 532 again: 533 err = xenbus_transaction_start(&xbt); 534 if (err != 0) { 535 xenbus_dev_error(xsd, EIO, "starting transaction"); 536 return; 537 } 538 539 err = xenbus_printf(xbt, xsname, "tx-ring-ref", "%u", 540 xnfp->xnf_tx_ring_ref); 541 if (err != 0) { 542 message = "writing tx ring-ref"; 543 goto abort_transaction; 544 } 545 546 err = xenbus_printf(xbt, xsname, "rx-ring-ref", "%u", 547 xnfp->xnf_rx_ring_ref); 548 if (err != 0) { 549 message = "writing rx ring-ref"; 550 goto abort_transaction; 551 } 552 553 err = xenbus_printf(xbt, xsname, "event-channel", "%u", 554 xnfp->xnf_evtchn); 555 if (err != 0) { 556 message = "writing event-channel"; 557 goto abort_transaction; 558 } 559 560 err = xenbus_printf(xbt, xsname, "feature-rx-notify", "%d", 1); 561 if (err != 0) { 562 message = "writing feature-rx-notify"; 563 goto abort_transaction; 564 } 565 566 if (!xnfp->xnf_tx_pages_readonly) { 567 err = xenbus_printf(xbt, xsname, "feature-tx-writable", 568 "%d", 1); 569 if (err != 0) { 570 message = "writing feature-tx-writable"; 571 goto abort_transaction; 572 } 573 } 574 575 err = xenbus_printf(xbt, xsname, "feature-no-csum-offload", "%d", 576 xnfp->xnf_cksum_offload ? 0 : 1); 577 if (err != 0) { 578 message = "writing feature-no-csum-offload"; 579 goto abort_transaction; 580 } 581 err = xenbus_printf(xbt, xsname, "request-rx-copy", "%d", 582 xnfp->xnf_rx_hvcopy ? 1 : 0); 583 if (err != 0) { 584 message = "writing request-rx-copy"; 585 goto abort_transaction; 586 } 587 588 err = xenbus_printf(xbt, xsname, "state", "%d", XenbusStateConnected); 589 if (err != 0) { 590 message = "writing frontend XenbusStateConnected"; 591 goto abort_transaction; 592 } 593 594 err = xenbus_transaction_end(xbt, 0); 595 if (err != 0) { 596 if (err == EAGAIN) 597 goto again; 598 xenbus_dev_error(xsd, err, "completing transaction"); 599 } 600 601 return; 602 603 abort_transaction: 604 (void) xenbus_transaction_end(xbt, 1); 605 xenbus_dev_error(xsd, err, "%s", message); 606 } 607 608 /* 609 * attach(9E) -- Attach a device to the system 610 * 611 * Called once for each board successfully probed. 612 */ 613 static int 614 xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) 615 { 616 mac_register_t *macp; 617 xnf_t *xnfp; 618 int err; 619 620 #ifdef XNF_DEBUG 621 if (xnfdebug & XNF_DEBUG_DDI) 622 printf("xnf%d: attach(0x%p)\n", ddi_get_instance(devinfo), 623 (void *)devinfo); 624 #endif 625 626 switch (cmd) { 627 case DDI_RESUME: 628 xnfp = ddi_get_driver_private(devinfo); 629 630 (void) xvdi_resume(devinfo); 631 (void) xvdi_alloc_evtchn(devinfo); 632 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 633 #ifdef XPV_HVM_DRIVER 634 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, 635 xnfp); 636 #else 637 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, 638 (caddr_t)xnfp); 639 #endif 640 xnf_be_connect(xnfp); 641 /* 642 * Our MAC address may have changed if we're resuming: 643 * - on a different host 644 * - on the same one and got a different MAC address 645 * because we didn't specify one of our own. 646 * so it's useful to claim that it changed in order that 647 * IP send out a gratuitous ARP. 648 */ 649 mac_unicst_update(xnfp->xnf_mh, xnfp->xnf_mac_addr); 650 return (DDI_SUCCESS); 651 652 case DDI_ATTACH: 653 break; 654 655 default: 656 return (DDI_FAILURE); 657 } 658 659 /* 660 * Allocate gld_mac_info_t and xnf_instance structures 661 */ 662 macp = mac_alloc(MAC_VERSION); 663 if (macp == NULL) 664 return (DDI_FAILURE); 665 xnfp = kmem_zalloc(sizeof (*xnfp), KM_SLEEP); 666 667 macp->m_dip = devinfo; 668 macp->m_driver = xnfp; 669 xnfp->xnf_devinfo = devinfo; 670 671 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 672 macp->m_src_addr = xnfp->xnf_mac_addr; 673 macp->m_callbacks = &xnf_callbacks; 674 macp->m_min_sdu = 0; 675 macp->m_max_sdu = XNF_MAXPKT; 676 677 xnfp->xnf_running = B_FALSE; 678 xnfp->xnf_connected = B_FALSE; 679 xnfp->xnf_cksum_offload = xnf_cksum_offload; 680 xnfp->xnf_tx_pages_readonly = xnf_tx_pages_readonly; 681 682 xnfp->xnf_rx_hvcopy = xnf_hvcopy_peer_status(devinfo) && xnf_rx_hvcopy; 683 #ifdef XPV_HVM_DRIVER 684 /* 685 * Report our version to dom0. 686 */ 687 if (xenbus_printf(XBT_NULL, "hvmpv/xnf", "version", "%d", 688 HVMPV_XNF_VERS)) 689 cmn_err(CE_WARN, "xnf: couldn't write version\n"); 690 691 if (!xnfp->xnf_rx_hvcopy) { 692 cmn_err(CE_WARN, "The xnf driver requires a dom0 that " 693 "supports 'feature-rx-copy'"); 694 goto failure; 695 } 696 #endif 697 698 /* 699 * Get the iblock cookie with which to initialize the mutexes. 700 */ 701 if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->xnf_icookie) 702 != DDI_SUCCESS) 703 goto failure; 704 /* 705 * Driver locking strategy: the txlock protects all paths 706 * through the driver, except the interrupt thread. 707 * If the interrupt thread needs to do something which could 708 * affect the operation of any other part of the driver, 709 * it needs to acquire the txlock mutex. 710 */ 711 mutex_init(&xnfp->xnf_tx_buf_mutex, 712 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 713 mutex_init(&xnfp->xnf_rx_buf_mutex, 714 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 715 mutex_init(&xnfp->xnf_txlock, 716 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 717 mutex_init(&xnfp->xnf_intrlock, 718 NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 719 cv_init(&xnfp->xnf_cv, NULL, CV_DEFAULT, NULL); 720 721 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, 722 &xnfp->xnf_gref_tx_head) < 0) { 723 cmn_err(CE_WARN, "xnf%d: can't alloc tx grant refs", 724 ddi_get_instance(xnfp->xnf_devinfo)); 725 goto failure_1; 726 } 727 if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, 728 &xnfp->xnf_gref_rx_head) < 0) { 729 cmn_err(CE_WARN, "xnf%d: can't alloc rx grant refs", 730 ddi_get_instance(xnfp->xnf_devinfo)); 731 goto failure_1; 732 } 733 if (xnf_alloc_dma_resources(xnfp) == DDI_FAILURE) { 734 cmn_err(CE_WARN, "xnf%d: failed to allocate and initialize " 735 "driver data structures", 736 ddi_get_instance(xnfp->xnf_devinfo)); 737 goto failure_1; 738 } 739 740 xnfp->xnf_rx_ring.sring->rsp_event = 741 xnfp->xnf_tx_ring.sring->rsp_event = 1; 742 743 xnfp->xnf_tx_ring_ref = GRANT_INVALID_REF; 744 xnfp->xnf_rx_ring_ref = GRANT_INVALID_REF; 745 746 /* set driver private pointer now */ 747 ddi_set_driver_private(devinfo, xnfp); 748 749 if (xvdi_add_event_handler(devinfo, XS_OE_STATE, oe_state_change) 750 != DDI_SUCCESS) 751 goto failure_1; 752 753 if (!xnf_kstat_init(xnfp)) 754 goto failure_2; 755 756 /* 757 * Allocate an event channel, add the interrupt handler and 758 * bind it to the event channel. 759 */ 760 (void) xvdi_alloc_evtchn(devinfo); 761 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 762 #ifdef XPV_HVM_DRIVER 763 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, xnfp); 764 #else 765 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, (caddr_t)xnfp); 766 #endif 767 768 /* 769 * connect to the backend 770 */ 771 xnf_be_connect(xnfp); 772 773 err = mac_register(macp, &xnfp->xnf_mh); 774 mac_free(macp); 775 macp = NULL; 776 if (err != 0) 777 goto failure_3; 778 779 #ifdef XPV_HVM_DRIVER 780 /* 781 * In the HVM case, this driver essentially replaces a driver for 782 * a 'real' PCI NIC. Without the "model" property set to 783 * "Ethernet controller", like the PCI code does, netbooting does 784 * not work correctly, as strplumb_get_netdev_path() will not find 785 * this interface. 786 */ 787 (void) ndi_prop_update_string(DDI_DEV_T_NONE, devinfo, "model", 788 "Ethernet controller"); 789 #endif 790 791 return (DDI_SUCCESS); 792 793 failure_3: 794 kstat_delete(xnfp->xnf_kstat_aux); 795 796 failure_2: 797 xvdi_remove_event_handler(devinfo, XS_OE_STATE); 798 #ifdef XPV_HVM_DRIVER 799 ec_unbind_evtchn(xnfp->xnf_evtchn); 800 xvdi_free_evtchn(devinfo); 801 #else 802 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 803 #endif 804 xnfp->xnf_evtchn = INVALID_EVTCHN; 805 806 failure_1: 807 xnf_release_dma_resources(xnfp); 808 cv_destroy(&xnfp->xnf_cv); 809 mutex_destroy(&xnfp->xnf_rx_buf_mutex); 810 mutex_destroy(&xnfp->xnf_txlock); 811 mutex_destroy(&xnfp->xnf_intrlock); 812 813 failure: 814 kmem_free(xnfp, sizeof (*xnfp)); 815 if (macp != NULL) 816 mac_free(macp); 817 818 return (DDI_FAILURE); 819 } 820 821 /* detach(9E) -- Detach a device from the system */ 822 static int 823 xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) 824 { 825 xnf_t *xnfp; /* Our private device info */ 826 int i; 827 828 #ifdef XNF_DEBUG 829 if (xnfdebug & XNF_DEBUG_DDI) 830 printf("xnf_detach(0x%p)\n", (void *)devinfo); 831 #endif 832 833 xnfp = ddi_get_driver_private(devinfo); 834 835 switch (cmd) { 836 case DDI_SUSPEND: 837 #ifdef XPV_HVM_DRIVER 838 ec_unbind_evtchn(xnfp->xnf_evtchn); 839 xvdi_free_evtchn(devinfo); 840 #else 841 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 842 #endif 843 844 xvdi_suspend(devinfo); 845 846 mutex_enter(&xnfp->xnf_intrlock); 847 mutex_enter(&xnfp->xnf_txlock); 848 849 xnfp->xnf_evtchn = INVALID_EVTCHN; 850 xnfp->xnf_connected = B_FALSE; 851 mutex_exit(&xnfp->xnf_txlock); 852 mutex_exit(&xnfp->xnf_intrlock); 853 return (DDI_SUCCESS); 854 855 case DDI_DETACH: 856 break; 857 858 default: 859 return (DDI_FAILURE); 860 } 861 862 if (xnfp->xnf_connected) 863 return (DDI_FAILURE); 864 865 /* Wait for receive buffers to be returned; give up after 5 seconds */ 866 i = 50; 867 868 mutex_enter(&xnfp->xnf_rx_buf_mutex); 869 while (xnfp->xnf_rx_bufs_outstanding > 0) { 870 mutex_exit(&xnfp->xnf_rx_buf_mutex); 871 delay(drv_usectohz(100000)); 872 if (--i == 0) { 873 cmn_err(CE_WARN, 874 "xnf%d: never reclaimed all the " 875 "receive buffers. Still have %d " 876 "buffers outstanding.", 877 ddi_get_instance(xnfp->xnf_devinfo), 878 xnfp->xnf_rx_bufs_outstanding); 879 return (DDI_FAILURE); 880 } 881 mutex_enter(&xnfp->xnf_rx_buf_mutex); 882 } 883 mutex_exit(&xnfp->xnf_rx_buf_mutex); 884 885 kstat_delete(xnfp->xnf_kstat_aux); 886 887 if (mac_unregister(xnfp->xnf_mh) != 0) 888 return (DDI_FAILURE); 889 890 /* Stop the receiver */ 891 xnf_stop(xnfp); 892 893 xvdi_remove_event_handler(devinfo, XS_OE_STATE); 894 895 /* Remove the interrupt */ 896 #ifdef XPV_HVM_DRIVER 897 ec_unbind_evtchn(xnfp->xnf_evtchn); 898 xvdi_free_evtchn(devinfo); 899 #else 900 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 901 #endif 902 903 /* Release any pending xmit mblks */ 904 xnf_release_mblks(xnfp); 905 906 /* Release all DMA resources */ 907 xnf_release_dma_resources(xnfp); 908 909 cv_destroy(&xnfp->xnf_cv); 910 mutex_destroy(&xnfp->xnf_rx_buf_mutex); 911 mutex_destroy(&xnfp->xnf_txlock); 912 mutex_destroy(&xnfp->xnf_intrlock); 913 914 kmem_free(xnfp, sizeof (*xnfp)); 915 916 return (DDI_SUCCESS); 917 } 918 919 /* 920 * xnf_set_mac_addr() -- set the physical network address on the board. 921 */ 922 /*ARGSUSED*/ 923 static int 924 xnf_set_mac_addr(void *arg, const uint8_t *macaddr) 925 { 926 xnf_t *xnfp = arg; 927 928 #ifdef XNF_DEBUG 929 if (xnfdebug & XNF_DEBUG_TRACE) 930 printf("xnf%d: set_mac_addr(0x%p): " 931 "%02x:%02x:%02x:%02x:%02x:%02x\n", 932 ddi_get_instance(xnfp->xnf_devinfo), 933 (void *)xnfp, macaddr[0], macaddr[1], macaddr[2], 934 macaddr[3], macaddr[4], macaddr[5]); 935 #endif 936 /* 937 * We can't set our macaddr. 938 * 939 * XXPV dme: Why not? 940 */ 941 return (ENOTSUP); 942 } 943 944 /* 945 * xnf_set_multicast() -- set (enable) or disable a multicast address. 946 * 947 * Program the hardware to enable/disable the multicast address 948 * in "mcast". Enable if "add" is true, disable if false. 949 */ 950 /*ARGSUSED*/ 951 static int 952 xnf_set_multicast(void *arg, boolean_t add, const uint8_t *mca) 953 { 954 xnf_t *xnfp = arg; 955 956 #ifdef XNF_DEBUG 957 if (xnfdebug & XNF_DEBUG_TRACE) 958 printf("xnf%d set_multicast(0x%p): " 959 "%02x:%02x:%02x:%02x:%02x:%02x\n", 960 ddi_get_instance(xnfp->xnf_devinfo), 961 (void *)xnfp, mca[0], mca[1], mca[2], 962 mca[3], mca[4], mca[5]); 963 #endif 964 965 /* 966 * XXPV dme: Ideally we'd relay the address to the backend for 967 * enabling. The protocol doesn't support that (interesting 968 * extension), so we simply succeed and hope that the relevant 969 * packets are going to arrive. 970 * 971 * If protocol support is added for enable/disable then we'll 972 * need to keep a list of those in use and re-add on resume. 973 */ 974 return (0); 975 } 976 977 /* 978 * xnf_set_promiscuous() -- set or reset promiscuous mode on the board 979 * 980 * Program the hardware to enable/disable promiscuous mode. 981 */ 982 /*ARGSUSED*/ 983 static int 984 xnf_set_promiscuous(void *arg, boolean_t on) 985 { 986 xnf_t *xnfp = arg; 987 988 #ifdef XNF_DEBUG 989 if (xnfdebug & XNF_DEBUG_TRACE) 990 printf("xnf%d set_promiscuous(0x%p, %x)\n", 991 ddi_get_instance(xnfp->xnf_devinfo), 992 (void *)xnfp, on); 993 #endif 994 /* 995 * We can't really do this, but we pretend that we can in 996 * order that snoop will work. 997 */ 998 return (0); 999 } 1000 1001 /* 1002 * Clean buffers that we have responses for from the transmit ring. 1003 */ 1004 static int 1005 xnf_clean_tx_ring(xnf_t *xnfp) 1006 { 1007 RING_IDX next_resp, i; 1008 struct tx_pktinfo *reap; 1009 int id; 1010 grant_ref_t ref; 1011 1012 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 1013 1014 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_tx_ring)) { 1015 /* 1016 * index of next transmission ack 1017 */ 1018 next_resp = xnfp->xnf_tx_ring.sring->rsp_prod; 1019 membar_consumer(); 1020 /* 1021 * Clean tx packets from ring that we have responses for 1022 */ 1023 for (i = xnfp->xnf_tx_ring.rsp_cons; i != next_resp; i++) { 1024 id = RING_GET_RESPONSE(&xnfp->xnf_tx_ring, i)->id; 1025 reap = &xnfp->xnf_tx_pkt_info[id]; 1026 ref = reap->grant_ref; 1027 /* 1028 * Return id to free list 1029 */ 1030 reap->id = xnfp->xnf_tx_pkt_id_list; 1031 xnfp->xnf_tx_pkt_id_list = id; 1032 if (gnttab_query_foreign_access(ref) != 0) 1033 panic("tx grant still in use " 1034 "by backend domain"); 1035 (void) ddi_dma_unbind_handle(reap->dma_handle); 1036 (void) gnttab_end_foreign_access_ref(ref, 1037 xnfp->xnf_tx_pages_readonly); 1038 gnttab_release_grant_reference(&xnfp->xnf_gref_tx_head, 1039 ref); 1040 freemsg(reap->mp); 1041 reap->mp = NULL; 1042 reap->grant_ref = GRANT_INVALID_REF; 1043 if (reap->bdesc != NULL) 1044 xnf_free_tx_buffer(reap->bdesc); 1045 reap->bdesc = NULL; 1046 } 1047 xnfp->xnf_tx_ring.rsp_cons = next_resp; 1048 membar_enter(); 1049 } 1050 1051 return (RING_FREE_REQUESTS(&xnfp->xnf_tx_ring)); 1052 } 1053 1054 /* 1055 * If we need to pull up data from either a packet that crosses a page 1056 * boundary or consisting of multiple mblks, do it here. We allocate 1057 * a page aligned buffer and copy the data into it. The header for the 1058 * allocated buffer is returned. (which is also allocated here) 1059 */ 1060 static struct xnf_buffer_desc * 1061 xnf_pullupmsg(xnf_t *xnfp, mblk_t *mp) 1062 { 1063 struct xnf_buffer_desc *bdesc; 1064 mblk_t *mptr; 1065 caddr_t bp; 1066 int len; 1067 1068 /* 1069 * get a xmit buffer from the xmit buffer pool 1070 */ 1071 mutex_enter(&xnfp->xnf_rx_buf_mutex); 1072 bdesc = xnf_get_tx_buffer(xnfp); 1073 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1074 if (bdesc == NULL) 1075 return (bdesc); 1076 /* 1077 * Copy the data into the buffer 1078 */ 1079 xnfp->xnf_stat_tx_pullup++; 1080 bp = bdesc->buf; 1081 for (mptr = mp; mptr != NULL; mptr = mptr->b_cont) { 1082 len = mptr->b_wptr - mptr->b_rptr; 1083 bcopy(mptr->b_rptr, bp, len); 1084 bp += len; 1085 } 1086 return (bdesc); 1087 } 1088 1089 /* 1090 * xnf_send_one() -- send a packet 1091 * 1092 * Called when a packet is ready to be transmitted. A pointer to an 1093 * M_DATA message that contains the packet is passed to this routine. 1094 * At least the complete LLC header is contained in the message's 1095 * first message block, and the remainder of the packet is contained 1096 * within additional M_DATA message blocks linked to the first 1097 * message block. 1098 * 1099 */ 1100 static boolean_t 1101 xnf_send_one(xnf_t *xnfp, mblk_t *mp) 1102 { 1103 struct xnf_buffer_desc *xmitbuf; 1104 struct tx_pktinfo *txp_info; 1105 mblk_t *mptr; 1106 ddi_dma_cookie_t dma_cookie; 1107 RING_IDX slot; 1108 int length = 0, i, pktlen = 0, rc, tx_id; 1109 int tx_ring_freespace, page_oops; 1110 uint_t ncookies; 1111 volatile netif_tx_request_t *txrp; 1112 caddr_t bufaddr; 1113 grant_ref_t ref; 1114 unsigned long mfn; 1115 uint32_t pflags; 1116 domid_t oeid; 1117 1118 #ifdef XNF_DEBUG 1119 if (xnfdebug & XNF_DEBUG_SEND) 1120 printf("xnf%d send(0x%p, 0x%p)\n", 1121 ddi_get_instance(xnfp->xnf_devinfo), 1122 (void *)xnfp, (void *)mp); 1123 #endif 1124 1125 ASSERT(mp != NULL); 1126 ASSERT(mp->b_next == NULL); 1127 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 1128 1129 tx_ring_freespace = xnf_clean_tx_ring(xnfp); 1130 ASSERT(tx_ring_freespace >= 0); 1131 1132 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 1133 xnfp->xnf_stat_tx_attempt++; 1134 /* 1135 * If there are no xmit ring slots available, return. 1136 */ 1137 if (tx_ring_freespace == 0) { 1138 xnfp->xnf_stat_tx_defer++; 1139 return (B_FALSE); /* Send should be retried */ 1140 } 1141 1142 slot = xnfp->xnf_tx_ring.req_prod_pvt; 1143 /* Count the number of mblks in message and compute packet size */ 1144 for (i = 0, mptr = mp; mptr != NULL; mptr = mptr->b_cont, i++) 1145 pktlen += (mptr->b_wptr - mptr->b_rptr); 1146 1147 /* Make sure packet isn't too large */ 1148 if (pktlen > XNF_FRAMESIZE) { 1149 cmn_err(CE_WARN, "xnf%d: oversized packet (%d bytes) dropped", 1150 ddi_get_instance(xnfp->xnf_devinfo), pktlen); 1151 freemsg(mp); 1152 return (B_TRUE); 1153 } 1154 1155 /* 1156 * Test if we cross a page boundary with our buffer 1157 */ 1158 page_oops = (i == 1) && 1159 (xnf_btop((size_t)mp->b_rptr) != 1160 xnf_btop((size_t)(mp->b_rptr + pktlen))); 1161 /* 1162 * XXPV - unfortunately, the Xen virtual net device currently 1163 * doesn't support multiple packet frags, so this will always 1164 * end up doing the pullup if we got more than one packet. 1165 */ 1166 if (i > xnf_max_tx_frags || page_oops) { 1167 if (page_oops) 1168 xnfp->xnf_stat_tx_pagebndry++; 1169 if ((xmitbuf = xnf_pullupmsg(xnfp, mp)) == NULL) { 1170 /* could not allocate resources? */ 1171 #ifdef XNF_DEBUG 1172 cmn_err(CE_WARN, "xnf%d: pullupmsg failed", 1173 ddi_get_instance(xnfp->xnf_devinfo)); 1174 #endif 1175 xnfp->xnf_stat_tx_defer++; 1176 return (B_FALSE); /* Retry send */ 1177 } 1178 bufaddr = xmitbuf->buf; 1179 } else { 1180 xmitbuf = NULL; 1181 bufaddr = (caddr_t)mp->b_rptr; 1182 } 1183 1184 /* set up data descriptor */ 1185 length = pktlen; 1186 1187 /* 1188 * Get packet id from free list 1189 */ 1190 tx_id = xnfp->xnf_tx_pkt_id_list; 1191 ASSERT(tx_id < NET_TX_RING_SIZE); 1192 txp_info = &xnfp->xnf_tx_pkt_info[tx_id]; 1193 xnfp->xnf_tx_pkt_id_list = txp_info->id; 1194 txp_info->id = tx_id; 1195 1196 /* Prepare for DMA mapping of tx buffer(s) */ 1197 rc = ddi_dma_addr_bind_handle(txp_info->dma_handle, 1198 NULL, bufaddr, length, DDI_DMA_WRITE | DDI_DMA_STREAMING, 1199 DDI_DMA_DONTWAIT, 0, &dma_cookie, &ncookies); 1200 if (rc != DDI_DMA_MAPPED) { 1201 ASSERT(rc != DDI_DMA_INUSE); 1202 ASSERT(rc != DDI_DMA_PARTIAL_MAP); 1203 /* 1204 * Return id to free list 1205 */ 1206 txp_info->id = xnfp->xnf_tx_pkt_id_list; 1207 xnfp->xnf_tx_pkt_id_list = tx_id; 1208 if (rc == DDI_DMA_NORESOURCES) { 1209 xnfp->xnf_stat_tx_defer++; 1210 return (B_FALSE); /* Retry later */ 1211 } 1212 #ifdef XNF_DEBUG 1213 cmn_err(CE_WARN, "xnf%d: bind_handle failed (%x)", 1214 ddi_get_instance(xnfp->xnf_devinfo), rc); 1215 #endif 1216 return (B_FALSE); 1217 } 1218 1219 ASSERT(ncookies == 1); 1220 ref = gnttab_claim_grant_reference(&xnfp->xnf_gref_tx_head); 1221 ASSERT((signed short)ref >= 0); 1222 mfn = xnf_btop(pa_to_ma((paddr_t)dma_cookie.dmac_laddress)); 1223 gnttab_grant_foreign_access_ref(ref, oeid, mfn, 1224 xnfp->xnf_tx_pages_readonly); 1225 txp_info->grant_ref = ref; 1226 txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot); 1227 txrp->gref = ref; 1228 txrp->size = dma_cookie.dmac_size; 1229 txrp->offset = (uintptr_t)bufaddr & PAGEOFFSET; 1230 txrp->id = tx_id; 1231 txrp->flags = 0; 1232 hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags); 1233 if (pflags != 0) { 1234 ASSERT(xnfp->xnf_cksum_offload); 1235 /* 1236 * If the local protocol stack requests checksum 1237 * offload we set the 'checksum blank' flag, 1238 * indicating to the peer that we need the checksum 1239 * calculated for us. 1240 * 1241 * We _don't_ set the validated flag, because we haven't 1242 * validated that the data and the checksum match. 1243 */ 1244 txrp->flags |= NETTXF_csum_blank; 1245 xnfp->xnf_stat_tx_cksum_deferred++; 1246 } 1247 membar_producer(); 1248 xnfp->xnf_tx_ring.req_prod_pvt = slot + 1; 1249 1250 txp_info->mp = mp; 1251 txp_info->bdesc = xmitbuf; 1252 1253 xnfp->xnf_stat_opackets++; 1254 xnfp->xnf_stat_obytes += pktlen; 1255 1256 return (B_TRUE); /* successful transmit attempt */ 1257 } 1258 1259 mblk_t * 1260 xnf_send(void *arg, mblk_t *mp) 1261 { 1262 xnf_t *xnfp = arg; 1263 mblk_t *next; 1264 boolean_t sent_something = B_FALSE; 1265 1266 mutex_enter(&xnfp->xnf_txlock); 1267 1268 /* 1269 * Transmission attempts should be impossible without having 1270 * previously called xnf_start(). 1271 */ 1272 ASSERT(xnfp->xnf_running); 1273 1274 /* 1275 * Wait for getting connected to the backend 1276 */ 1277 while (!xnfp->xnf_connected) { 1278 cv_wait(&xnfp->xnf_cv, &xnfp->xnf_txlock); 1279 } 1280 1281 while (mp != NULL) { 1282 next = mp->b_next; 1283 mp->b_next = NULL; 1284 1285 if (!xnf_send_one(xnfp, mp)) { 1286 mp->b_next = next; 1287 break; 1288 } 1289 1290 mp = next; 1291 sent_something = B_TRUE; 1292 } 1293 1294 if (sent_something) { 1295 boolean_t notify; 1296 1297 /* LINTED: constant in conditional context */ 1298 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring, 1299 notify); 1300 if (notify) 1301 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1302 } 1303 1304 mutex_exit(&xnfp->xnf_txlock); 1305 1306 return (mp); 1307 } 1308 1309 /* 1310 * xnf_intr() -- ring interrupt service routine 1311 */ 1312 static uint_t 1313 xnf_intr(caddr_t arg) 1314 { 1315 xnf_t *xnfp = (xnf_t *)arg; 1316 int tx_ring_space; 1317 1318 mutex_enter(&xnfp->xnf_intrlock); 1319 1320 /* 1321 * If not connected to the peer or not started by the upper 1322 * layers we cannot usefully handle interrupts. 1323 */ 1324 if (!(xnfp->xnf_connected && xnfp->xnf_running)) { 1325 mutex_exit(&xnfp->xnf_intrlock); 1326 xnfp->xnf_stat_unclaimed_interrupts++; 1327 return (DDI_INTR_UNCLAIMED); 1328 } 1329 1330 #ifdef XNF_DEBUG 1331 if (xnfdebug & XNF_DEBUG_INT) 1332 printf("xnf%d intr(0x%p)\n", 1333 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1334 #endif 1335 if (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1336 mblk_t *mp; 1337 1338 if (xnfp->xnf_rx_hvcopy) 1339 mp = xnf_process_hvcopy_recv(xnfp); 1340 else 1341 mp = xnf_process_recv(xnfp); 1342 1343 if (mp != NULL) 1344 mac_rx(xnfp->xnf_mh, xnfp->xnf_rx_handle, mp); 1345 } 1346 1347 /* 1348 * Clean tx ring and try to start any blocked xmit streams if 1349 * there is now some space. 1350 */ 1351 mutex_enter(&xnfp->xnf_txlock); 1352 tx_ring_space = xnf_clean_tx_ring(xnfp); 1353 mutex_exit(&xnfp->xnf_txlock); 1354 if (tx_ring_space > XNF_TX_FREE_THRESH) { 1355 mutex_exit(&xnfp->xnf_intrlock); 1356 mac_tx_update(xnfp->xnf_mh); 1357 mutex_enter(&xnfp->xnf_intrlock); 1358 } 1359 1360 xnfp->xnf_stat_interrupts++; 1361 mutex_exit(&xnfp->xnf_intrlock); 1362 return (DDI_INTR_CLAIMED); /* indicate that the interrupt was for us */ 1363 } 1364 1365 /* 1366 * xnf_start() -- start the board receiving and enable interrupts. 1367 */ 1368 static int 1369 xnf_start(void *arg) 1370 { 1371 xnf_t *xnfp = arg; 1372 1373 #ifdef XNF_DEBUG 1374 if (xnfdebug & XNF_DEBUG_TRACE) 1375 printf("xnf%d start(0x%p)\n", 1376 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1377 #endif 1378 1379 mutex_enter(&xnfp->xnf_intrlock); 1380 mutex_enter(&xnfp->xnf_txlock); 1381 1382 /* Accept packets from above. */ 1383 xnfp->xnf_running = B_TRUE; 1384 1385 mutex_exit(&xnfp->xnf_txlock); 1386 mutex_exit(&xnfp->xnf_intrlock); 1387 1388 return (0); 1389 } 1390 1391 /* xnf_stop() - disable hardware */ 1392 static void 1393 xnf_stop(void *arg) 1394 { 1395 xnf_t *xnfp = arg; 1396 1397 #ifdef XNF_DEBUG 1398 if (xnfdebug & XNF_DEBUG_TRACE) 1399 printf("xnf%d stop(0x%p)\n", 1400 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1401 #endif 1402 1403 mutex_enter(&xnfp->xnf_intrlock); 1404 mutex_enter(&xnfp->xnf_txlock); 1405 1406 xnfp->xnf_running = B_FALSE; 1407 1408 mutex_exit(&xnfp->xnf_txlock); 1409 mutex_exit(&xnfp->xnf_intrlock); 1410 } 1411 1412 /* 1413 * Driver private functions follow 1414 */ 1415 1416 /* 1417 * Hang buffer on rx ring 1418 */ 1419 static void 1420 rx_buffer_hang(xnf_t *xnfp, struct xnf_buffer_desc *bdesc) 1421 { 1422 volatile netif_rx_request_t *reqp; 1423 RING_IDX hang_ix; 1424 grant_ref_t ref; 1425 domid_t oeid; 1426 1427 oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 1428 1429 ASSERT(MUTEX_HELD(&xnfp->xnf_intrlock)); 1430 reqp = RING_GET_REQUEST(&xnfp->xnf_rx_ring, 1431 xnfp->xnf_rx_ring.req_prod_pvt); 1432 hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0)); 1433 ASSERT(xnfp->xnf_rxpkt_bufptr[hang_ix] == NULL); 1434 if (bdesc->grant_ref == GRANT_INVALID_REF) { 1435 ref = gnttab_claim_grant_reference(&xnfp->xnf_gref_rx_head); 1436 ASSERT((signed short)ref >= 0); 1437 bdesc->grant_ref = ref; 1438 if (xnfp->xnf_rx_hvcopy) { 1439 pfn_t pfn = xnf_btop(bdesc->buf_phys); 1440 mfn_t mfn = pfn_to_mfn(pfn); 1441 1442 gnttab_grant_foreign_access_ref(ref, oeid, mfn, 0); 1443 } else { 1444 gnttab_grant_foreign_transfer_ref(ref, oeid, 0); 1445 } 1446 } 1447 reqp->id = hang_ix; 1448 reqp->gref = bdesc->grant_ref; 1449 bdesc->id = hang_ix; 1450 xnfp->xnf_rxpkt_bufptr[hang_ix] = bdesc; 1451 membar_producer(); 1452 xnfp->xnf_rx_ring.req_prod_pvt++; 1453 } 1454 1455 static mblk_t * 1456 xnf_process_hvcopy_recv(xnf_t *xnfp) 1457 { 1458 netif_rx_response_t *rxpkt; 1459 mblk_t *mp, *head, *tail; 1460 struct xnf_buffer_desc *bdesc; 1461 boolean_t hwcsum = B_FALSE, notify, work_to_do; 1462 size_t len; 1463 1464 /* 1465 * in loop over unconsumed responses, we do: 1466 * 1. get a response 1467 * 2. take corresponding buffer off recv. ring 1468 * 3. indicate this by setting slot to NULL 1469 * 4. create a new message and 1470 * 5. copy data in, adjust ptr 1471 * 1472 * outside loop: 1473 * 7. make sure no more data has arrived; kick HV 1474 */ 1475 1476 head = tail = NULL; 1477 1478 loop: 1479 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1480 1481 /* 1. */ 1482 rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring, 1483 xnfp->xnf_rx_ring.rsp_cons); 1484 1485 DTRACE_PROBE4(got_PKT, int, (int)rxpkt->id, int, 1486 (int)rxpkt->offset, 1487 int, (int)rxpkt->flags, int, (int)rxpkt->status); 1488 1489 /* 1490 * 2. 1491 * Take buffer off of receive ring 1492 */ 1493 hwcsum = B_FALSE; 1494 bdesc = xnfp->xnf_rxpkt_bufptr[rxpkt->id]; 1495 /* 3 */ 1496 xnfp->xnf_rxpkt_bufptr[rxpkt->id] = NULL; 1497 ASSERT(bdesc->id == rxpkt->id); 1498 if (rxpkt->status <= 0) { 1499 DTRACE_PROBE4(pkt_status_negative, int, rxpkt->status, 1500 char *, bdesc->buf, int, rxpkt->offset, 1501 char *, ((char *)bdesc->buf) + rxpkt->offset); 1502 mp = NULL; 1503 xnfp->xnf_stat_errrx++; 1504 if (rxpkt->status == 0) 1505 xnfp->xnf_stat_runt++; 1506 if (rxpkt->status == NETIF_RSP_ERROR) 1507 xnfp->xnf_stat_mac_rcv_error++; 1508 if (rxpkt->status == NETIF_RSP_DROPPED) 1509 xnfp->xnf_stat_norxbuf++; 1510 /* 1511 * re-hang the buffer 1512 */ 1513 rx_buffer_hang(xnfp, bdesc); 1514 } else { 1515 grant_ref_t ref = bdesc->grant_ref; 1516 struct xnf_buffer_desc *new_bdesc; 1517 unsigned long off = rxpkt->offset; 1518 1519 DTRACE_PROBE4(pkt_status_ok, int, rxpkt->status, 1520 char *, bdesc->buf, int, rxpkt->offset, 1521 char *, ((char *)bdesc->buf) + rxpkt->offset); 1522 len = rxpkt->status; 1523 ASSERT(off + len <= PAGEOFFSET); 1524 if (ref == GRANT_INVALID_REF) { 1525 mp = NULL; 1526 new_bdesc = bdesc; 1527 cmn_err(CE_WARN, "Bad rx grant reference %d " 1528 "from dom %d", ref, 1529 xvdi_get_oeid(xnfp->xnf_devinfo)); 1530 goto luckless; 1531 } 1532 /* 1533 * Release ref which we'll be re-claiming in 1534 * rx_buffer_hang(). 1535 */ 1536 bdesc->grant_ref = GRANT_INVALID_REF; 1537 (void) gnttab_end_foreign_access_ref(ref, 0); 1538 gnttab_release_grant_reference(&xnfp->xnf_gref_rx_head, 1539 ref); 1540 if (rxpkt->flags & NETRXF_data_validated) 1541 hwcsum = B_TRUE; 1542 1543 /* 1544 * XXPV for the initial implementation of HVcopy, 1545 * create a new msg and copy in the data 1546 */ 1547 /* 4. */ 1548 if ((mp = allocb(len, BPRI_MED)) == NULL) { 1549 /* 1550 * Couldn't get buffer to copy to, 1551 * drop this data, and re-hang 1552 * the buffer on the ring. 1553 */ 1554 xnfp->xnf_stat_norxbuf++; 1555 DTRACE_PROBE(alloc_nix); 1556 } else { 1557 /* 5. */ 1558 DTRACE_PROBE(alloc_ok); 1559 bcopy(bdesc->buf + off, mp->b_wptr, 1560 len); 1561 mp->b_wptr += len; 1562 } 1563 new_bdesc = bdesc; 1564 luckless: 1565 1566 /* Re-hang old or hang new buffer. */ 1567 rx_buffer_hang(xnfp, new_bdesc); 1568 } 1569 if (mp) { 1570 if (hwcsum) { 1571 /* 1572 * See comments in xnf_process_recv(). 1573 */ 1574 1575 (void) hcksum_assoc(mp, NULL, 1576 NULL, 0, 0, 0, 0, 1577 HCK_FULLCKSUM | 1578 HCK_FULLCKSUM_OK, 1579 0); 1580 xnfp->xnf_stat_rx_cksum_no_need++; 1581 } 1582 if (head == NULL) { 1583 head = tail = mp; 1584 } else { 1585 tail->b_next = mp; 1586 tail = mp; 1587 } 1588 1589 ASSERT(mp->b_next == NULL); 1590 1591 xnfp->xnf_stat_ipackets++; 1592 xnfp->xnf_stat_rbytes += len; 1593 } 1594 1595 xnfp->xnf_rx_ring.rsp_cons++; 1596 1597 xnfp->xnf_stat_hvcopy_packet_processed++; 1598 } 1599 1600 /* 7. */ 1601 /* 1602 * Has more data come in since we started? 1603 */ 1604 /* LINTED: constant in conditional context */ 1605 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_rx_ring, work_to_do); 1606 if (work_to_do) 1607 goto loop; 1608 1609 /* 1610 * Indicate to the backend that we have re-filled the receive 1611 * ring. 1612 */ 1613 /* LINTED: constant in conditional context */ 1614 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify); 1615 if (notify) 1616 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1617 1618 return (head); 1619 } 1620 1621 /* Process all queued received packets */ 1622 static mblk_t * 1623 xnf_process_recv(xnf_t *xnfp) 1624 { 1625 volatile netif_rx_response_t *rxpkt; 1626 mblk_t *mp, *head, *tail; 1627 struct xnf_buffer_desc *bdesc; 1628 extern mblk_t *desballoc(unsigned char *, size_t, uint_t, frtn_t *); 1629 boolean_t hwcsum = B_FALSE, notify, work_to_do; 1630 size_t len; 1631 pfn_t pfn; 1632 long cnt; 1633 1634 head = tail = NULL; 1635 loop: 1636 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1637 1638 rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring, 1639 xnfp->xnf_rx_ring.rsp_cons); 1640 1641 /* 1642 * Take buffer off of receive ring 1643 */ 1644 hwcsum = B_FALSE; 1645 bdesc = xnfp->xnf_rxpkt_bufptr[rxpkt->id]; 1646 xnfp->xnf_rxpkt_bufptr[rxpkt->id] = NULL; 1647 ASSERT(bdesc->id == rxpkt->id); 1648 if (rxpkt->status <= 0) { 1649 mp = NULL; 1650 xnfp->xnf_stat_errrx++; 1651 if (rxpkt->status == 0) 1652 xnfp->xnf_stat_runt++; 1653 if (rxpkt->status == NETIF_RSP_ERROR) 1654 xnfp->xnf_stat_mac_rcv_error++; 1655 if (rxpkt->status == NETIF_RSP_DROPPED) 1656 xnfp->xnf_stat_norxbuf++; 1657 /* 1658 * re-hang the buffer 1659 */ 1660 rx_buffer_hang(xnfp, bdesc); 1661 } else { 1662 grant_ref_t ref = bdesc->grant_ref; 1663 struct xnf_buffer_desc *new_bdesc; 1664 unsigned long off = rxpkt->offset; 1665 unsigned long mfn; 1666 1667 len = rxpkt->status; 1668 ASSERT(off + len <= PAGEOFFSET); 1669 if (ref == GRANT_INVALID_REF) { 1670 mp = NULL; 1671 new_bdesc = bdesc; 1672 cmn_err(CE_WARN, "Bad rx grant reference %d " 1673 "from dom %d", ref, 1674 xvdi_get_oeid(xnfp->xnf_devinfo)); 1675 goto luckless; 1676 } 1677 bdesc->grant_ref = GRANT_INVALID_REF; 1678 mfn = gnttab_end_foreign_transfer_ref(ref); 1679 ASSERT(mfn != MFN_INVALID); 1680 ASSERT(hat_getpfnum(kas.a_hat, bdesc->buf) == 1681 PFN_INVALID); 1682 1683 gnttab_release_grant_reference(&xnfp->xnf_gref_rx_head, 1684 ref); 1685 reassign_pfn(xnf_btop(bdesc->buf_phys), mfn); 1686 hat_devload(kas.a_hat, bdesc->buf, PAGESIZE, 1687 xnf_btop(bdesc->buf_phys), 1688 PROT_READ | PROT_WRITE, HAT_LOAD); 1689 balloon_drv_added(1); 1690 1691 if (rxpkt->flags & NETRXF_data_validated) 1692 hwcsum = B_TRUE; 1693 if (len <= xnf_rx_bcopy_thresh) { 1694 /* 1695 * For small buffers, just copy the data 1696 * and send the copy upstream. 1697 */ 1698 new_bdesc = NULL; 1699 } else { 1700 /* 1701 * We send a pointer to this data upstream; 1702 * we need a new buffer to replace this one. 1703 */ 1704 mutex_enter(&xnfp->xnf_rx_buf_mutex); 1705 new_bdesc = xnf_get_buffer(xnfp); 1706 if (new_bdesc != NULL) { 1707 xnfp->xnf_rx_bufs_outstanding++; 1708 } else { 1709 xnfp->xnf_stat_rx_no_ringbuf++; 1710 } 1711 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1712 } 1713 1714 if (new_bdesc == NULL) { 1715 /* 1716 * Don't have a new ring buffer; bcopy the data 1717 * from the buffer, and preserve the 1718 * original buffer 1719 */ 1720 if ((mp = allocb(len, BPRI_MED)) == NULL) { 1721 /* 1722 * Could't get buffer to copy to, 1723 * drop this data, and re-hang 1724 * the buffer on the ring. 1725 */ 1726 xnfp->xnf_stat_norxbuf++; 1727 } else { 1728 bcopy(bdesc->buf + off, mp->b_wptr, 1729 len); 1730 } 1731 /* 1732 * Give the buffer page back to xen 1733 */ 1734 pfn = xnf_btop(bdesc->buf_phys); 1735 cnt = balloon_free_pages(1, &mfn, bdesc->buf, 1736 &pfn); 1737 if (cnt != 1) { 1738 cmn_err(CE_WARN, "unable to give a " 1739 "page back to the hypervisor\n"); 1740 } 1741 new_bdesc = bdesc; 1742 } else { 1743 if ((mp = desballoc((unsigned char *)bdesc->buf, 1744 off + len, 0, (frtn_t *)bdesc)) == NULL) { 1745 /* 1746 * Couldn't get mblk to pass recv data 1747 * up with, free the old ring buffer 1748 */ 1749 xnfp->xnf_stat_norxbuf++; 1750 xnf_rcv_complete(bdesc); 1751 goto luckless; 1752 } 1753 (void) ddi_dma_sync(bdesc->dma_handle, 1754 0, 0, DDI_DMA_SYNC_FORCPU); 1755 1756 mp->b_wptr += off; 1757 mp->b_rptr += off; 1758 } 1759 luckless: 1760 if (mp) 1761 mp->b_wptr += len; 1762 /* re-hang old or hang new buffer */ 1763 rx_buffer_hang(xnfp, new_bdesc); 1764 } 1765 if (mp) { 1766 if (hwcsum) { 1767 /* 1768 * If the peer says that the data has 1769 * been validated then we declare that 1770 * the full checksum has been 1771 * verified. 1772 * 1773 * We don't look at the "checksum 1774 * blank" flag, and hence could have a 1775 * packet here that we are asserting 1776 * is good with a blank checksum. 1777 * 1778 * The hardware checksum offload 1779 * specification says that we must 1780 * provide the actual checksum as well 1781 * as an assertion that it is valid, 1782 * but the protocol stack doesn't 1783 * actually use it and some other 1784 * drivers don't bother, so we don't. 1785 * If it was necessary we could grovel 1786 * in the packet to find it. 1787 */ 1788 1789 (void) hcksum_assoc(mp, NULL, 1790 NULL, 0, 0, 0, 0, 1791 HCK_FULLCKSUM | 1792 HCK_FULLCKSUM_OK, 1793 0); 1794 xnfp->xnf_stat_rx_cksum_no_need++; 1795 } 1796 if (head == NULL) { 1797 head = tail = mp; 1798 } else { 1799 tail->b_next = mp; 1800 tail = mp; 1801 } 1802 1803 ASSERT(mp->b_next == NULL); 1804 1805 xnfp->xnf_stat_ipackets++; 1806 xnfp->xnf_stat_rbytes += len; 1807 } 1808 1809 xnfp->xnf_rx_ring.rsp_cons++; 1810 } 1811 1812 /* 1813 * Has more data come in since we started? 1814 */ 1815 /* LINTED: constant in conditional context */ 1816 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_rx_ring, work_to_do); 1817 if (work_to_do) 1818 goto loop; 1819 1820 /* 1821 * Indicate to the backend that we have re-filled the receive 1822 * ring. 1823 */ 1824 /* LINTED: constant in conditional context */ 1825 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify); 1826 if (notify) 1827 ec_notify_via_evtchn(xnfp->xnf_evtchn); 1828 1829 return (head); 1830 } 1831 1832 /* Called when the upper layers free a message we passed upstream */ 1833 static void 1834 xnf_rcv_complete(struct xnf_buffer_desc *bdesc) 1835 { 1836 xnf_t *xnfp = bdesc->xnfp; 1837 pfn_t pfn; 1838 long cnt; 1839 1840 /* One less outstanding receive buffer */ 1841 mutex_enter(&xnfp->xnf_rx_buf_mutex); 1842 --xnfp->xnf_rx_bufs_outstanding; 1843 /* 1844 * Return buffer to the free list, unless the free list is getting 1845 * too large. XXPV - this threshold may need tuning. 1846 */ 1847 if (xnfp->xnf_rx_descs_free < xnf_rx_bufs_lowat) { 1848 /* 1849 * Unmap the page, and hand the machine page back 1850 * to xen so it can be re-used as a backend net buffer. 1851 */ 1852 pfn = xnf_btop(bdesc->buf_phys); 1853 cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn); 1854 if (cnt != 1) { 1855 cmn_err(CE_WARN, "unable to give a page back to the " 1856 "hypervisor\n"); 1857 } 1858 1859 bdesc->next = xnfp->xnf_free_list; 1860 xnfp->xnf_free_list = bdesc; 1861 xnfp->xnf_rx_descs_free++; 1862 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1863 } else { 1864 /* 1865 * We can return everything here since we have a free buffer 1866 * that we have not given the backing page for back to xen. 1867 */ 1868 --xnfp->xnf_rx_buffer_count; 1869 mutex_exit(&xnfp->xnf_rx_buf_mutex); 1870 (void) ddi_dma_unbind_handle(bdesc->dma_handle); 1871 ddi_dma_mem_free(&bdesc->acc_handle); 1872 ddi_dma_free_handle(&bdesc->dma_handle); 1873 kmem_free(bdesc, sizeof (*bdesc)); 1874 } 1875 } 1876 1877 /* 1878 * xnf_alloc_dma_resources() -- initialize the drivers structures 1879 */ 1880 static int 1881 xnf_alloc_dma_resources(xnf_t *xnfp) 1882 { 1883 dev_info_t *devinfo = xnfp->xnf_devinfo; 1884 int i; 1885 size_t len; 1886 ddi_dma_cookie_t dma_cookie; 1887 uint_t ncookies; 1888 struct xnf_buffer_desc *bdesc; 1889 int rc; 1890 caddr_t rptr; 1891 1892 xnfp->xnf_n_rx = NET_RX_RING_SIZE; 1893 xnfp->xnf_max_rx_bufs = xnf_rx_bufs_hiwat; 1894 1895 xnfp->xnf_n_tx = NET_TX_RING_SIZE; 1896 1897 /* 1898 * The code below allocates all the DMA data structures that 1899 * need to be released when the driver is detached. 1900 * 1901 * First allocate handles for mapping (virtual address) pointers to 1902 * transmit data buffers to physical addresses 1903 */ 1904 for (i = 0; i < xnfp->xnf_n_tx; i++) { 1905 if ((rc = ddi_dma_alloc_handle(devinfo, 1906 &tx_buffer_dma_attr, DDI_DMA_SLEEP, 0, 1907 &xnfp->xnf_tx_pkt_info[i].dma_handle)) != DDI_SUCCESS) 1908 return (DDI_FAILURE); 1909 } 1910 1911 /* 1912 * Allocate page for the transmit descriptor ring. 1913 */ 1914 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 1915 DDI_DMA_SLEEP, 0, &xnfp->xnf_tx_ring_dma_handle) != DDI_SUCCESS) 1916 goto alloc_error; 1917 1918 if (ddi_dma_mem_alloc(xnfp->xnf_tx_ring_dma_handle, 1919 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 1920 DDI_DMA_SLEEP, 0, &rptr, &len, 1921 &xnfp->xnf_tx_ring_dma_acchandle) != DDI_SUCCESS) { 1922 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 1923 xnfp->xnf_tx_ring_dma_handle = NULL; 1924 goto alloc_error; 1925 } 1926 1927 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_tx_ring_dma_handle, NULL, 1928 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 1929 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 1930 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 1931 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 1932 xnfp->xnf_tx_ring_dma_handle = NULL; 1933 xnfp->xnf_tx_ring_dma_acchandle = NULL; 1934 if (rc == DDI_DMA_NORESOURCES) 1935 goto alloc_error; 1936 else 1937 goto error; 1938 } 1939 1940 ASSERT(ncookies == 1); 1941 bzero(rptr, PAGESIZE); 1942 /* LINTED: constant in conditional context */ 1943 SHARED_RING_INIT((netif_tx_sring_t *)rptr); 1944 /* LINTED: constant in conditional context */ 1945 FRONT_RING_INIT(&xnfp->xnf_tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE); 1946 xnfp->xnf_tx_ring_phys_addr = dma_cookie.dmac_laddress; 1947 1948 /* 1949 * Allocate page for the receive descriptor ring. 1950 */ 1951 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 1952 DDI_DMA_SLEEP, 0, &xnfp->xnf_rx_ring_dma_handle) != DDI_SUCCESS) 1953 goto alloc_error; 1954 1955 if (ddi_dma_mem_alloc(xnfp->xnf_rx_ring_dma_handle, 1956 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 1957 DDI_DMA_SLEEP, 0, &rptr, &len, 1958 &xnfp->xnf_rx_ring_dma_acchandle) != DDI_SUCCESS) { 1959 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 1960 xnfp->xnf_rx_ring_dma_handle = NULL; 1961 goto alloc_error; 1962 } 1963 1964 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_rx_ring_dma_handle, NULL, 1965 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 1966 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 1967 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 1968 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 1969 xnfp->xnf_rx_ring_dma_handle = NULL; 1970 xnfp->xnf_rx_ring_dma_acchandle = NULL; 1971 if (rc == DDI_DMA_NORESOURCES) 1972 goto alloc_error; 1973 else 1974 goto error; 1975 } 1976 1977 ASSERT(ncookies == 1); 1978 bzero(rptr, PAGESIZE); 1979 /* LINTED: constant in conditional context */ 1980 SHARED_RING_INIT((netif_rx_sring_t *)rptr); 1981 /* LINTED: constant in conditional context */ 1982 FRONT_RING_INIT(&xnfp->xnf_rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE); 1983 xnfp->xnf_rx_ring_phys_addr = dma_cookie.dmac_laddress; 1984 1985 /* 1986 * Preallocate receive buffers for each receive descriptor. 1987 */ 1988 1989 /* Set up the "free list" of receive buffer descriptors */ 1990 for (i = 0; i < xnfp->xnf_n_rx; i++) { 1991 if ((bdesc = xnf_alloc_buffer(xnfp)) == NULL) 1992 goto alloc_error; 1993 bdesc->next = xnfp->xnf_free_list; 1994 xnfp->xnf_free_list = bdesc; 1995 } 1996 1997 return (DDI_SUCCESS); 1998 1999 alloc_error: 2000 cmn_err(CE_WARN, "xnf%d: could not allocate enough DMA memory", 2001 ddi_get_instance(xnfp->xnf_devinfo)); 2002 error: 2003 xnf_release_dma_resources(xnfp); 2004 return (DDI_FAILURE); 2005 } 2006 2007 /* 2008 * Release all DMA resources in the opposite order from acquisition 2009 * Should not be called until all outstanding esballoc buffers 2010 * have been returned. 2011 */ 2012 static void 2013 xnf_release_dma_resources(xnf_t *xnfp) 2014 { 2015 int i; 2016 2017 /* 2018 * Free receive buffers which are currently associated with 2019 * descriptors 2020 */ 2021 for (i = 0; i < xnfp->xnf_n_rx; i++) { 2022 struct xnf_buffer_desc *bp; 2023 2024 if ((bp = xnfp->xnf_rxpkt_bufptr[i]) == NULL) 2025 continue; 2026 xnf_free_buffer(bp); 2027 xnfp->xnf_rxpkt_bufptr[i] = NULL; 2028 } 2029 2030 /* Free the receive ring buffer */ 2031 if (xnfp->xnf_rx_ring_dma_acchandle != NULL) { 2032 (void) ddi_dma_unbind_handle(xnfp->xnf_rx_ring_dma_handle); 2033 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 2034 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2035 xnfp->xnf_rx_ring_dma_acchandle = NULL; 2036 } 2037 /* Free the transmit ring buffer */ 2038 if (xnfp->xnf_tx_ring_dma_acchandle != NULL) { 2039 (void) ddi_dma_unbind_handle(xnfp->xnf_tx_ring_dma_handle); 2040 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 2041 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2042 xnfp->xnf_tx_ring_dma_acchandle = NULL; 2043 } 2044 } 2045 2046 static void 2047 xnf_release_mblks(xnf_t *xnfp) 2048 { 2049 int i; 2050 2051 for (i = 0; i < xnfp->xnf_n_tx; i++) { 2052 if (xnfp->xnf_tx_pkt_info[i].mp == NULL) 2053 continue; 2054 freemsg(xnfp->xnf_tx_pkt_info[i].mp); 2055 xnfp->xnf_tx_pkt_info[i].mp = NULL; 2056 (void) ddi_dma_unbind_handle( 2057 xnfp->xnf_tx_pkt_info[i].dma_handle); 2058 } 2059 } 2060 2061 /* 2062 * Remove a xmit buffer descriptor from the head of the free list and return 2063 * a pointer to it. If no buffers on list, attempt to allocate a new one. 2064 * Called with the tx_buf_mutex held. 2065 */ 2066 static struct xnf_buffer_desc * 2067 xnf_get_tx_buffer(xnf_t *xnfp) 2068 { 2069 struct xnf_buffer_desc *bdesc; 2070 2071 bdesc = xnfp->xnf_tx_free_list; 2072 if (bdesc != NULL) { 2073 xnfp->xnf_tx_free_list = bdesc->next; 2074 } else { 2075 bdesc = xnf_alloc_tx_buffer(xnfp); 2076 } 2077 return (bdesc); 2078 } 2079 2080 /* 2081 * Remove a buffer descriptor from the head of the free list and return 2082 * a pointer to it. If no buffers on list, attempt to allocate a new one. 2083 * Called with the rx_buf_mutex held. 2084 */ 2085 static struct xnf_buffer_desc * 2086 xnf_get_buffer(xnf_t *xnfp) 2087 { 2088 struct xnf_buffer_desc *bdesc; 2089 2090 bdesc = xnfp->xnf_free_list; 2091 if (bdesc != NULL) { 2092 xnfp->xnf_free_list = bdesc->next; 2093 xnfp->xnf_rx_descs_free--; 2094 } else { 2095 bdesc = xnf_alloc_buffer(xnfp); 2096 } 2097 return (bdesc); 2098 } 2099 2100 /* 2101 * Free a xmit buffer back to the xmit free list 2102 */ 2103 static void 2104 xnf_free_tx_buffer(struct xnf_buffer_desc *bp) 2105 { 2106 xnf_t *xnfp = bp->xnfp; 2107 2108 mutex_enter(&xnfp->xnf_tx_buf_mutex); 2109 bp->next = xnfp->xnf_tx_free_list; 2110 xnfp->xnf_tx_free_list = bp; 2111 mutex_exit(&xnfp->xnf_tx_buf_mutex); 2112 } 2113 2114 /* 2115 * Put a buffer descriptor onto the head of the free list. 2116 * for page-flip: 2117 * We can't really free these buffers back to the kernel 2118 * since we have given away their backing page to be used 2119 * by the back end net driver. 2120 * for hvcopy: 2121 * release all the memory 2122 */ 2123 static void 2124 xnf_free_buffer(struct xnf_buffer_desc *bdesc) 2125 { 2126 xnf_t *xnfp = bdesc->xnfp; 2127 2128 mutex_enter(&xnfp->xnf_rx_buf_mutex); 2129 if (xnfp->xnf_rx_hvcopy) { 2130 if (ddi_dma_unbind_handle(bdesc->dma_handle) != DDI_SUCCESS) 2131 goto out; 2132 ddi_dma_mem_free(&bdesc->acc_handle); 2133 ddi_dma_free_handle(&bdesc->dma_handle); 2134 kmem_free(bdesc, sizeof (*bdesc)); 2135 xnfp->xnf_rx_buffer_count--; 2136 } else { 2137 bdesc->next = xnfp->xnf_free_list; 2138 xnfp->xnf_free_list = bdesc; 2139 xnfp->xnf_rx_descs_free++; 2140 } 2141 out: 2142 mutex_exit(&xnfp->xnf_rx_buf_mutex); 2143 } 2144 2145 /* 2146 * Allocate a DMA-able xmit buffer, including a structure to 2147 * keep track of the buffer. Called with tx_buf_mutex held. 2148 */ 2149 static struct xnf_buffer_desc * 2150 xnf_alloc_tx_buffer(xnf_t *xnfp) 2151 { 2152 struct xnf_buffer_desc *bdesc; 2153 size_t len; 2154 2155 if ((bdesc = kmem_zalloc(sizeof (*bdesc), KM_NOSLEEP)) == NULL) 2156 return (NULL); 2157 2158 /* allocate a DMA access handle for receive buffer */ 2159 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &tx_buffer_dma_attr, 2160 0, 0, &bdesc->dma_handle) != DDI_SUCCESS) 2161 goto failure; 2162 2163 /* Allocate DMA-able memory for transmit buffer */ 2164 if (ddi_dma_mem_alloc(bdesc->dma_handle, 2165 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0, 2166 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 2167 goto failure_1; 2168 2169 bdesc->xnfp = xnfp; 2170 xnfp->xnf_tx_buffer_count++; 2171 2172 return (bdesc); 2173 2174 failure_1: 2175 ddi_dma_free_handle(&bdesc->dma_handle); 2176 2177 failure: 2178 kmem_free(bdesc, sizeof (*bdesc)); 2179 return (NULL); 2180 } 2181 2182 /* 2183 * Allocate a DMA-able receive buffer, including a structure to 2184 * keep track of the buffer. Called with rx_buf_mutex held. 2185 */ 2186 static struct xnf_buffer_desc * 2187 xnf_alloc_buffer(xnf_t *xnfp) 2188 { 2189 struct xnf_buffer_desc *bdesc; 2190 size_t len; 2191 uint_t ncookies; 2192 ddi_dma_cookie_t dma_cookie; 2193 long cnt; 2194 pfn_t pfn; 2195 2196 if (xnfp->xnf_rx_buffer_count >= xnfp->xnf_max_rx_bufs) 2197 return (NULL); 2198 2199 if ((bdesc = kmem_zalloc(sizeof (*bdesc), KM_NOSLEEP)) == NULL) 2200 return (NULL); 2201 2202 /* allocate a DMA access handle for receive buffer */ 2203 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &rx_buffer_dma_attr, 2204 0, 0, &bdesc->dma_handle) != DDI_SUCCESS) 2205 goto failure; 2206 2207 /* Allocate DMA-able memory for receive buffer */ 2208 if (ddi_dma_mem_alloc(bdesc->dma_handle, 2209 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0, 2210 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 2211 goto failure_1; 2212 2213 /* bind to virtual address of buffer to get physical address */ 2214 if (ddi_dma_addr_bind_handle(bdesc->dma_handle, NULL, 2215 bdesc->buf, PAGESIZE, DDI_DMA_READ | DDI_DMA_STREAMING, 2216 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies) != DDI_DMA_MAPPED) 2217 goto failure_2; 2218 2219 bdesc->buf_phys = dma_cookie.dmac_laddress; 2220 bdesc->xnfp = xnfp; 2221 if (xnfp->xnf_rx_hvcopy) { 2222 bdesc->free_rtn.free_func = xnf_copy_rcv_complete; 2223 } else { 2224 bdesc->free_rtn.free_func = xnf_rcv_complete; 2225 } 2226 bdesc->free_rtn.free_arg = (char *)bdesc; 2227 bdesc->grant_ref = GRANT_INVALID_REF; 2228 ASSERT(ncookies == 1); 2229 2230 xnfp->xnf_rx_buffer_count++; 2231 2232 if (!xnfp->xnf_rx_hvcopy) { 2233 /* 2234 * Unmap the page, and hand the machine page back 2235 * to xen so it can be used as a backend net buffer. 2236 */ 2237 pfn = xnf_btop(bdesc->buf_phys); 2238 cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn); 2239 if (cnt != 1) { 2240 cmn_err(CE_WARN, "unable to give a page back to the " 2241 "hypervisor\n"); 2242 } 2243 } 2244 2245 return (bdesc); 2246 2247 failure_2: 2248 ddi_dma_mem_free(&bdesc->acc_handle); 2249 2250 failure_1: 2251 ddi_dma_free_handle(&bdesc->dma_handle); 2252 2253 failure: 2254 kmem_free(bdesc, sizeof (*bdesc)); 2255 return (NULL); 2256 } 2257 2258 /* 2259 * Statistics. 2260 */ 2261 static char *xnf_aux_statistics[] = { 2262 "tx_cksum_deferred", 2263 "rx_cksum_no_need", 2264 "interrupts", 2265 "unclaimed_interrupts", 2266 "tx_pullup", 2267 "tx_pagebndry", 2268 "tx_attempt", 2269 "rx_no_ringbuf", 2270 "hvcopy_packet_processed", 2271 }; 2272 2273 static int 2274 xnf_kstat_aux_update(kstat_t *ksp, int flag) 2275 { 2276 xnf_t *xnfp; 2277 kstat_named_t *knp; 2278 2279 if (flag != KSTAT_READ) 2280 return (EACCES); 2281 2282 xnfp = ksp->ks_private; 2283 knp = ksp->ks_data; 2284 2285 /* 2286 * Assignment order must match that of the names in 2287 * xnf_aux_statistics. 2288 */ 2289 (knp++)->value.ui64 = xnfp->xnf_stat_tx_cksum_deferred; 2290 (knp++)->value.ui64 = xnfp->xnf_stat_rx_cksum_no_need; 2291 2292 (knp++)->value.ui64 = xnfp->xnf_stat_interrupts; 2293 (knp++)->value.ui64 = xnfp->xnf_stat_unclaimed_interrupts; 2294 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pullup; 2295 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pagebndry; 2296 (knp++)->value.ui64 = xnfp->xnf_stat_tx_attempt; 2297 (knp++)->value.ui64 = xnfp->xnf_stat_rx_no_ringbuf; 2298 2299 (knp++)->value.ui64 = xnfp->xnf_stat_hvcopy_packet_processed; 2300 2301 return (0); 2302 } 2303 2304 static boolean_t 2305 xnf_kstat_init(xnf_t *xnfp) 2306 { 2307 int nstat = sizeof (xnf_aux_statistics) / 2308 sizeof (xnf_aux_statistics[0]); 2309 char **cp = xnf_aux_statistics; 2310 kstat_named_t *knp; 2311 2312 /* 2313 * Create and initialise kstats. 2314 */ 2315 if ((xnfp->xnf_kstat_aux = kstat_create("xnf", 2316 ddi_get_instance(xnfp->xnf_devinfo), 2317 "aux_statistics", "net", KSTAT_TYPE_NAMED, 2318 nstat, 0)) == NULL) 2319 return (B_FALSE); 2320 2321 xnfp->xnf_kstat_aux->ks_private = xnfp; 2322 xnfp->xnf_kstat_aux->ks_update = xnf_kstat_aux_update; 2323 2324 knp = xnfp->xnf_kstat_aux->ks_data; 2325 while (nstat > 0) { 2326 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 2327 2328 knp++; 2329 cp++; 2330 nstat--; 2331 } 2332 2333 kstat_install(xnfp->xnf_kstat_aux); 2334 2335 return (B_TRUE); 2336 } 2337 2338 static int 2339 xnf_stat(void *arg, uint_t stat, uint64_t *val) 2340 { 2341 xnf_t *xnfp = arg; 2342 2343 mutex_enter(&xnfp->xnf_intrlock); 2344 mutex_enter(&xnfp->xnf_txlock); 2345 2346 #define mac_stat(q, r) \ 2347 case (MAC_STAT_##q): \ 2348 *val = xnfp->xnf_stat_##r; \ 2349 break 2350 2351 #define ether_stat(q, r) \ 2352 case (ETHER_STAT_##q): \ 2353 *val = xnfp->xnf_stat_##r; \ 2354 break 2355 2356 switch (stat) { 2357 2358 mac_stat(IPACKETS, ipackets); 2359 mac_stat(OPACKETS, opackets); 2360 mac_stat(RBYTES, rbytes); 2361 mac_stat(OBYTES, obytes); 2362 mac_stat(NORCVBUF, norxbuf); 2363 mac_stat(IERRORS, errrx); 2364 mac_stat(NOXMTBUF, tx_defer); 2365 2366 ether_stat(MACRCV_ERRORS, mac_rcv_error); 2367 ether_stat(TOOSHORT_ERRORS, runt); 2368 2369 default: 2370 mutex_exit(&xnfp->xnf_txlock); 2371 mutex_exit(&xnfp->xnf_intrlock); 2372 2373 return (ENOTSUP); 2374 } 2375 2376 #undef mac_stat 2377 #undef ether_stat 2378 2379 mutex_exit(&xnfp->xnf_txlock); 2380 mutex_exit(&xnfp->xnf_intrlock); 2381 2382 return (0); 2383 } 2384 2385 /*ARGSUSED*/ 2386 static void 2387 xnf_blank(void *arg, time_t ticks, uint_t count) 2388 { 2389 /* 2390 * XXPV dme: blanking is not currently implemented. 2391 * 2392 * It's not obvious how to use the 'ticks' argument here. 2393 * 2394 * 'Count' might be used as an indicator of how to set 2395 * rsp_event when posting receive buffers to the rx_ring. It 2396 * would replace the code at the tail of xnf_process_recv() 2397 * that simply indicates that the next completed packet should 2398 * cause an interrupt. 2399 */ 2400 } 2401 2402 static void 2403 xnf_resources(void *arg) 2404 { 2405 xnf_t *xnfp = arg; 2406 mac_rx_fifo_t mrf; 2407 2408 mrf.mrf_type = MAC_RX_FIFO; 2409 mrf.mrf_blank = xnf_blank; 2410 mrf.mrf_arg = (void *)xnfp; 2411 mrf.mrf_normal_blank_time = 128; /* XXPV dme: see xnf_blank() */ 2412 mrf.mrf_normal_pkt_count = 8; /* XXPV dme: see xnf_blank() */ 2413 2414 xnfp->xnf_rx_handle = mac_resource_add(xnfp->xnf_mh, 2415 (mac_resource_t *)&mrf); 2416 } 2417 2418 /*ARGSUSED*/ 2419 static void 2420 xnf_ioctl(void *arg, queue_t *q, mblk_t *mp) 2421 { 2422 miocnak(q, mp, 0, EINVAL); 2423 } 2424 2425 static boolean_t 2426 xnf_getcapab(void *arg, mac_capab_t cap, void *cap_data) 2427 { 2428 xnf_t *xnfp = arg; 2429 2430 switch (cap) { 2431 case MAC_CAPAB_HCKSUM: { 2432 uint32_t *capab = cap_data; 2433 2434 /* 2435 * We declare ourselves capable of HCKSUM_INET_PARTIAL 2436 * in order that the protocol stack insert the 2437 * pseudo-header checksum in packets that it passes 2438 * down to us. 2439 * 2440 * Whilst the flag used to communicate with dom0 is 2441 * called "NETTXF_csum_blank", the checksum in the 2442 * packet must contain the pseudo-header checksum and 2443 * not zero. (In fact, a Solaris dom0 is happy to deal 2444 * with a checksum of zero, but a Linux dom0 is not.) 2445 */ 2446 if (xnfp->xnf_cksum_offload) 2447 *capab = HCKSUM_INET_PARTIAL; 2448 else 2449 *capab = 0; 2450 break; 2451 } 2452 2453 case MAC_CAPAB_POLL: 2454 /* Just return B_TRUE. */ 2455 break; 2456 2457 default: 2458 return (B_FALSE); 2459 } 2460 2461 return (B_TRUE); 2462 } 2463 2464 /*ARGSUSED*/ 2465 static void 2466 oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 2467 void *arg, void *impl_data) 2468 { 2469 xnf_t *xnfp = ddi_get_driver_private(dip); 2470 XenbusState new_state = *(XenbusState *)impl_data; 2471 2472 ASSERT(xnfp != NULL); 2473 2474 switch (new_state) { 2475 case XenbusStateConnected: 2476 mutex_enter(&xnfp->xnf_intrlock); 2477 mutex_enter(&xnfp->xnf_txlock); 2478 2479 xnfp->xnf_connected = B_TRUE; 2480 cv_broadcast(&xnfp->xnf_cv); 2481 2482 mutex_exit(&xnfp->xnf_txlock); 2483 mutex_exit(&xnfp->xnf_intrlock); 2484 2485 ec_notify_via_evtchn(xnfp->xnf_evtchn); 2486 break; 2487 2488 default: 2489 break; 2490 } 2491 } 2492 2493 /* 2494 * Check whether backend is capable of and willing to talk 2495 * to us via hypervisor copy, as opposed to page flip. 2496 */ 2497 static boolean_t 2498 xnf_hvcopy_peer_status(dev_info_t *devinfo) 2499 { 2500 int be_rx_copy; 2501 int err; 2502 2503 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(devinfo), 2504 "feature-rx-copy", "%d", &be_rx_copy); 2505 /* 2506 * If we fail to read the store we assume that the key is 2507 * absent, implying an older domain at the far end. Older 2508 * domains cannot do HV copy (we assume ..). 2509 */ 2510 if (err != 0) 2511 be_rx_copy = 0; 2512 2513 return (be_rx_copy?B_TRUE:B_FALSE); 2514 } 2515