1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * 31 * Copyright (c) 2004 Christian Limpach. 32 * All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. This section intentionally left blank. 43 * 4. The name of the author may not be used to endorse or promote products 44 * derived from this software without specific prior written permission. 45 * 46 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 47 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 48 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 49 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 50 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 52 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 53 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 54 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 55 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 56 */ 57 /* 58 * Section 3 of the above license was updated in response to bug 6379571. 59 */ 60 61 /* 62 * xnf.c - Nemo-based network driver for domU 63 */ 64 65 #include <sys/types.h> 66 #include <sys/hypervisor.h> 67 #include <sys/debug.h> 68 #include <sys/errno.h> 69 #include <sys/param.h> 70 #include <sys/sysmacros.h> 71 #include <sys/systm.h> 72 #include <sys/stropts.h> 73 #include <sys/stream.h> 74 #include <sys/strsubr.h> 75 #include <sys/kmem.h> 76 #include <sys/conf.h> 77 #include <sys/ddi.h> 78 #include <sys/devops.h> 79 #include <sys/sunddi.h> 80 #include <sys/sunndi.h> 81 #include <sys/ksynch.h> 82 #include <sys/dlpi.h> 83 #include <sys/ethernet.h> 84 #include <sys/strsun.h> 85 #include <sys/pattr.h> 86 #include <inet/common.h> 87 #include <inet/ip.h> 88 #include <sys/stat.h> 89 #include <sys/modctl.h> 90 #include <sys/mac.h> 91 #include <sys/mac_ether.h> 92 #include <sys/atomic.h> 93 #include <sys/errno.h> 94 #include <sys/machsystm.h> 95 #include <sys/bootconf.h> 96 #include <sys/bootsvcs.h> 97 #include <sys/bootinfo.h> 98 #include <sys/promif.h> 99 #include <sys/archsystm.h> 100 #include <sys/gnttab.h> 101 #include <sys/mach_mmu.h> 102 #include <xen/public/memory.h> 103 104 #include "xnf.h" 105 106 #include <sys/evtchn_impl.h> 107 #include <sys/balloon_impl.h> 108 #include <xen/sys/xendev.h> 109 110 /* 111 * Declarations and Module Linkage 112 */ 113 114 #define IDENT "Virtual Ethernet driver" 115 116 #if defined(DEBUG) || defined(__lint) 117 #define XNF_DEBUG 118 int xnfdebug = 0; 119 #endif 120 121 /* 122 * On a 32 bit PAE system physical and machine addresses are larger 123 * than 32 bits. ddi_btop() on such systems take an unsigned long 124 * argument, and so addresses above 4G are truncated before ddi_btop() 125 * gets to see them. To avoid this, code the shift operation here. 126 */ 127 #define xnf_btop(addr) ((addr) >> PAGESHIFT) 128 129 boolean_t xnf_cksum_offload = B_TRUE; 130 /* 131 * Should pages used for transmit be readonly for the peer? 132 */ 133 boolean_t xnf_tx_pages_readonly = B_FALSE; 134 /* 135 * Packets under this size are bcopied instead of using desballoc. 136 * Choose a value > XNF_FRAMESIZE (1514) to force the receive path to 137 * always copy. 138 */ 139 unsigned int xnf_rx_bcopy_thresh = 64; 140 141 unsigned int xnf_max_tx_frags = 1; 142 143 /* Required system entry points */ 144 static int xnf_attach(dev_info_t *, ddi_attach_cmd_t); 145 static int xnf_detach(dev_info_t *, ddi_detach_cmd_t); 146 147 /* Required driver entry points for Nemo */ 148 static int xnf_start(void *); 149 static void xnf_stop(void *); 150 static int xnf_set_mac_addr(void *, const uint8_t *); 151 static int xnf_set_multicast(void *, boolean_t, const uint8_t *); 152 static int xnf_set_promiscuous(void *, boolean_t); 153 static mblk_t *xnf_send(void *, mblk_t *); 154 static uint_t xnf_intr(caddr_t); 155 static int xnf_stat(void *, uint_t, uint64_t *); 156 static void xnf_blank(void *, time_t, uint_t); 157 static void xnf_resources(void *); 158 static void xnf_ioctl(void *, queue_t *, mblk_t *); 159 static boolean_t xnf_getcapab(void *, mac_capab_t, void *); 160 161 /* Driver private functions */ 162 static int xnf_alloc_dma_resources(xnf_t *); 163 static void xnf_release_dma_resources(xnf_t *); 164 static mblk_t *xnf_process_recv(xnf_t *); 165 static void xnf_rcv_complete(struct xnf_buffer_desc *); 166 static void xnf_release_mblks(xnf_t *); 167 static struct xnf_buffer_desc *xnf_alloc_xmit_buffer(xnf_t *); 168 static struct xnf_buffer_desc *xnf_alloc_buffer(xnf_t *); 169 static struct xnf_buffer_desc *xnf_get_xmit_buffer(xnf_t *); 170 static struct xnf_buffer_desc *xnf_get_buffer(xnf_t *); 171 static void xnf_free_buffer(struct xnf_buffer_desc *); 172 static void xnf_free_xmit_buffer(struct xnf_buffer_desc *); 173 void xnf_send_driver_status(int, int); 174 static void rx_buffer_hang(xnf_t *, struct xnf_buffer_desc *); 175 static int xnf_clean_tx_ring(xnf_t *); 176 static void oe_state_change(dev_info_t *, ddi_eventcookie_t, 177 void *, void *); 178 179 /* 180 * XXPV dme: remove MC_IOCTL? 181 */ 182 static mac_callbacks_t xnf_callbacks = { 183 MC_RESOURCES | MC_IOCTL | MC_GETCAPAB, 184 xnf_stat, 185 xnf_start, 186 xnf_stop, 187 xnf_set_promiscuous, 188 xnf_set_multicast, 189 xnf_set_mac_addr, 190 xnf_send, 191 xnf_resources, 192 xnf_ioctl, 193 xnf_getcapab 194 }; 195 196 #define GRANT_INVALID_REF 0 197 int xnf_recv_bufs_lowat = 4 * NET_RX_RING_SIZE; 198 int xnf_recv_bufs_hiwat = 8 * NET_RX_RING_SIZE; /* default max */ 199 200 /* DMA attributes for network ring buffer */ 201 static ddi_dma_attr_t ringbuf_dma_attr = { 202 DMA_ATTR_V0, /* version of this structure */ 203 0, /* lowest usable address */ 204 0xffffffffffffffffULL, /* highest usable address */ 205 0x7fffffff, /* maximum DMAable byte count */ 206 MMU_PAGESIZE, /* alignment in bytes */ 207 0x7ff, /* bitmap of burst sizes */ 208 1, /* minimum transfer */ 209 0xffffffffU, /* maximum transfer */ 210 0xffffffffffffffffULL, /* maximum segment length */ 211 1, /* maximum number of segments */ 212 1, /* granularity */ 213 0, /* flags (reserved) */ 214 }; 215 216 /* DMA attributes for transmit data */ 217 static ddi_dma_attr_t tx_buffer_dma_attr = { 218 DMA_ATTR_V0, /* version of this structure */ 219 0, /* lowest usable address */ 220 0xffffffffffffffffULL, /* highest usable address */ 221 0x7fffffff, /* maximum DMAable byte count */ 222 MMU_PAGESIZE, /* alignment in bytes */ 223 0x7ff, /* bitmap of burst sizes */ 224 1, /* minimum transfer */ 225 0xffffffffU, /* maximum transfer */ 226 0xffffffffffffffffULL, /* maximum segment length */ 227 1, /* maximum number of segments */ 228 1, /* granularity */ 229 0, /* flags (reserved) */ 230 }; 231 232 /* DMA attributes for a receive buffer */ 233 static ddi_dma_attr_t rx_buffer_dma_attr = { 234 DMA_ATTR_V0, /* version of this structure */ 235 0, /* lowest usable address */ 236 0xffffffffffffffffULL, /* highest usable address */ 237 0x7fffffff, /* maximum DMAable byte count */ 238 MMU_PAGESIZE, /* alignment in bytes */ 239 0x7ff, /* bitmap of burst sizes */ 240 1, /* minimum transfer */ 241 0xffffffffU, /* maximum transfer */ 242 0xffffffffffffffffULL, /* maximum segment length */ 243 1, /* maximum number of segments */ 244 1, /* granularity */ 245 0, /* flags (reserved) */ 246 }; 247 248 /* DMA access attributes for registers and descriptors */ 249 static ddi_device_acc_attr_t accattr = { 250 DDI_DEVICE_ATTR_V0, 251 DDI_STRUCTURE_LE_ACC, /* This is a little-endian device */ 252 DDI_STRICTORDER_ACC 253 }; 254 255 /* DMA access attributes for data: NOT to be byte swapped. */ 256 static ddi_device_acc_attr_t data_accattr = { 257 DDI_DEVICE_ATTR_V0, 258 DDI_NEVERSWAP_ACC, 259 DDI_STRICTORDER_ACC 260 }; 261 262 unsigned char xnf_broadcastaddr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 263 int xnf_diagnose = 0; /* Patchable global for diagnostic purposes */ 264 265 DDI_DEFINE_STREAM_OPS(xnf_dev_ops, nulldev, nulldev, xnf_attach, xnf_detach, 266 nodev, NULL, D_MP, NULL); 267 268 static struct modldrv xnf_modldrv = { 269 &mod_driverops, /* Type of module. This one is a driver */ 270 IDENT " %I%", /* short description */ 271 &xnf_dev_ops /* driver specific ops */ 272 }; 273 274 static struct modlinkage modlinkage = { 275 MODREV_1, &xnf_modldrv, NULL 276 }; 277 278 int 279 _init(void) 280 { 281 int r; 282 283 mac_init_ops(&xnf_dev_ops, "xnf"); 284 r = mod_install(&modlinkage); 285 if (r != DDI_SUCCESS) 286 mac_fini_ops(&xnf_dev_ops); 287 288 return (r); 289 } 290 291 int 292 _fini(void) 293 { 294 return (EBUSY); /* XXPV dme: should be removable */ 295 } 296 297 int 298 _info(struct modinfo *modinfop) 299 { 300 return (mod_info(&modlinkage, modinfop)); 301 } 302 303 /* 304 * Statistics. 305 */ 306 /* XXPV: most of these names need re-"nice"ing */ 307 static char *xnf_aux_statistics[] = { 308 "tx_cksum_deferred", 309 "rx_cksum_no_need", 310 "intr", 311 "xmit_pullup", 312 "xmit_pagebndry", 313 "xmit_attempt", 314 "rx_no_ringbuf", 315 "mac_rcv_error", 316 "runt", 317 }; 318 319 static int 320 xnf_kstat_aux_update(kstat_t *ksp, int flag) 321 { 322 xnf_t *xnfp; 323 kstat_named_t *knp; 324 325 if (flag != KSTAT_READ) 326 return (EACCES); 327 328 xnfp = ksp->ks_private; 329 knp = ksp->ks_data; 330 331 /* 332 * Assignment order should match that of the names in 333 * xnf_aux_statistics. 334 */ 335 (knp++)->value.ui64 = xnfp->stat_tx_cksum_deferred; 336 (knp++)->value.ui64 = xnfp->stat_rx_cksum_no_need; 337 338 (knp++)->value.ui64 = xnfp->stat_intr; 339 (knp++)->value.ui64 = xnfp->stat_xmit_pullup; 340 (knp++)->value.ui64 = xnfp->stat_xmit_pagebndry; 341 (knp++)->value.ui64 = xnfp->stat_xmit_attempt; 342 (knp++)->value.ui64 = xnfp->stat_rx_no_ringbuf; 343 (knp++)->value.ui64 = xnfp->stat_mac_rcv_error; 344 (knp++)->value.ui64 = xnfp->stat_runt; 345 346 return (0); 347 } 348 349 static boolean_t 350 xnf_kstat_init(xnf_t *xnfp) 351 { 352 int nstat = sizeof (xnf_aux_statistics) / 353 sizeof (xnf_aux_statistics[0]); 354 char **cp = xnf_aux_statistics; 355 kstat_named_t *knp; 356 357 /* 358 * Create and initialise kstats. 359 */ 360 if ((xnfp->kstat_aux = kstat_create("xnf", 361 ddi_get_instance(xnfp->devinfo), 362 "aux_statistics", "net", KSTAT_TYPE_NAMED, 363 nstat, 0)) == NULL) 364 return (B_FALSE); 365 366 xnfp->kstat_aux->ks_private = xnfp; 367 xnfp->kstat_aux->ks_update = xnf_kstat_aux_update; 368 369 knp = xnfp->kstat_aux->ks_data; 370 while (nstat > 0) { 371 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 372 373 knp++; 374 cp++; 375 nstat--; 376 } 377 378 kstat_install(xnfp->kstat_aux); 379 380 return (B_TRUE); 381 } 382 383 static int 384 xnf_setup_rings(xnf_t *xnfp) 385 { 386 int ix, err; 387 RING_IDX i; 388 struct xnf_buffer_desc *bdesc, *rbp; 389 struct xenbus_device *xsd; 390 domid_t oeid; 391 392 oeid = xvdi_get_oeid(xnfp->devinfo); 393 xsd = xvdi_get_xsd(xnfp->devinfo); 394 395 if (xnfp->tx_ring_ref != GRANT_INVALID_REF) 396 gnttab_end_foreign_access(xnfp->tx_ring_ref, 0, 0); 397 398 err = gnttab_grant_foreign_access(oeid, 399 xnf_btop(pa_to_ma(xnfp->tx_ring_phys_addr)), 0); 400 if (err <= 0) { 401 err = -err; 402 xenbus_dev_error(xsd, err, "granting access to tx ring page"); 403 goto out; 404 } 405 xnfp->tx_ring_ref = (grant_ref_t)err; 406 407 if (xnfp->rx_ring_ref != GRANT_INVALID_REF) 408 gnttab_end_foreign_access(xnfp->rx_ring_ref, 0, 0); 409 410 err = gnttab_grant_foreign_access(oeid, 411 xnf_btop(pa_to_ma(xnfp->rx_ring_phys_addr)), 0); 412 if (err <= 0) { 413 err = -err; 414 xenbus_dev_error(xsd, err, "granting access to rx ring page"); 415 goto out; 416 } 417 xnfp->rx_ring_ref = (grant_ref_t)err; 418 419 420 mutex_enter(&xnfp->intrlock); 421 422 /* 423 * Cleanup the TX ring. We just clean up any valid tx_pktinfo structs 424 * and reset the ring. Note that this can lose packets after a resume, 425 * but we expect to stagger on. 426 */ 427 mutex_enter(&xnfp->txlock); 428 429 for (i = 0; i < xnfp->n_xmits; i++) { 430 struct tx_pktinfo *txp = &xnfp->tx_pkt_info[i]; 431 432 txp->id = i + 1; 433 434 if (txp->grant_ref == GRANT_INVALID_REF) { 435 ASSERT(txp->mp == NULL); 436 ASSERT(txp->bdesc == NULL); 437 continue; 438 } 439 440 if (gnttab_query_foreign_access(txp->grant_ref) != 0) 441 panic("tx grant still in use by backend domain"); 442 443 freemsg(txp->mp); 444 txp->mp = NULL; 445 446 (void) ddi_dma_unbind_handle(txp->dma_handle); 447 448 if (txp->bdesc != NULL) { 449 xnf_free_xmit_buffer(txp->bdesc); 450 txp->bdesc = NULL; 451 } 452 453 (void) gnttab_end_foreign_access_ref(txp->grant_ref, 454 xnfp->tx_pages_readonly); 455 gnttab_release_grant_reference(&xnfp->gref_tx_head, 456 txp->grant_ref); 457 txp->grant_ref = GRANT_INVALID_REF; 458 } 459 460 xnfp->tx_pkt_id_list = 0; 461 xnfp->tx_ring.rsp_cons = 0; 462 xnfp->tx_ring.sring->req_prod = 0; 463 xnfp->tx_ring.sring->rsp_prod = 0; 464 xnfp->tx_ring.sring->rsp_event = 1; 465 466 mutex_exit(&xnfp->txlock); 467 468 /* 469 * Rebuild the RX ring. We have to rebuild the RX ring because some of 470 * our pages are currently flipped out so we can't just free the RX 471 * buffers. Reclaim any unprocessed recv buffers, they won't be 472 * useable anyway since the mfn's they refer to are no longer valid. 473 * Grant the backend domain access to each hung rx buffer. 474 */ 475 i = xnfp->rx_ring.rsp_cons; 476 while (i++ != xnfp->rx_ring.sring->req_prod) { 477 volatile netif_rx_request_t *rxrp; 478 479 rxrp = RING_GET_REQUEST(&xnfp->rx_ring, i); 480 ix = rxrp - RING_GET_REQUEST(&xnfp->rx_ring, 0); 481 rbp = xnfp->rxpkt_bufptr[ix]; 482 if (rbp != NULL) { 483 ASSERT(rbp->grant_ref != GRANT_INVALID_REF); 484 gnttab_grant_foreign_transfer_ref(rbp->grant_ref, 485 oeid); 486 rxrp->id = ix; 487 rxrp->gref = rbp->grant_ref; 488 } 489 } 490 /* 491 * Reset the ring pointers to initial state. 492 * Hang buffers for any empty ring slots. 493 */ 494 xnfp->rx_ring.rsp_cons = 0; 495 xnfp->rx_ring.sring->req_prod = 0; 496 xnfp->rx_ring.sring->rsp_prod = 0; 497 xnfp->rx_ring.sring->rsp_event = 1; 498 for (i = 0; i < NET_RX_RING_SIZE; i++) { 499 xnfp->rx_ring.req_prod_pvt = i; 500 if (xnfp->rxpkt_bufptr[i] != NULL) 501 continue; 502 if ((bdesc = xnf_get_buffer(xnfp)) == NULL) 503 break; 504 rx_buffer_hang(xnfp, bdesc); 505 } 506 xnfp->rx_ring.req_prod_pvt = i; 507 /* LINTED: constant in conditional context */ 508 RING_PUSH_REQUESTS(&xnfp->rx_ring); 509 510 mutex_exit(&xnfp->intrlock); 511 512 return (0); 513 514 out: 515 if (xnfp->tx_ring_ref != GRANT_INVALID_REF) 516 gnttab_end_foreign_access(xnfp->tx_ring_ref, 0, 0); 517 xnfp->tx_ring_ref = GRANT_INVALID_REF; 518 519 if (xnfp->rx_ring_ref != GRANT_INVALID_REF) 520 gnttab_end_foreign_access(xnfp->rx_ring_ref, 0, 0); 521 xnfp->rx_ring_ref = GRANT_INVALID_REF; 522 523 return (err); 524 } 525 526 /* 527 * Connect driver to back end, called to set up communication with 528 * back end driver both initially and on resume after restore/migrate. 529 */ 530 void 531 xnf_be_connect(xnf_t *xnfp) 532 { 533 char mac[ETHERADDRL * 3]; 534 const char *message; 535 xenbus_transaction_t xbt; 536 struct xenbus_device *xsd; 537 char *xsname; 538 int err, be_no_cksum_offload; 539 540 ASSERT(!xnfp->connected); 541 542 xsd = xvdi_get_xsd(xnfp->devinfo); 543 xsname = xvdi_get_xsname(xnfp->devinfo); 544 545 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->devinfo), "mac", 546 "%s", (char *)&mac[0]); 547 if (err != 0) { 548 /* 549 * bad: we're supposed to be set up with a proper mac 550 * addr. at this point 551 */ 552 cmn_err(CE_WARN, "%s%d: no mac address", 553 ddi_driver_name(xnfp->devinfo), 554 ddi_get_instance(xnfp->devinfo)); 555 return; 556 } 557 558 if (ether_aton(mac, xnfp->mac_addr) != ETHERADDRL) { 559 err = ENOENT; 560 xenbus_dev_error(xsd, ENOENT, "parsing %s/mac", xsname); 561 return; 562 } 563 564 err = xnf_setup_rings(xnfp); 565 if (err != 0) { 566 cmn_err(CE_WARN, "failed to set up tx/rx rings"); 567 xenbus_dev_error(xsd, err, "setting up ring"); 568 return; 569 } 570 571 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->devinfo), 572 "feature-no-csum-offload", "%d", &be_no_cksum_offload); 573 /* 574 * If we fail to read the store we assume that the key is 575 * absent, implying an older domain at the far end. Older 576 * domains always support checksum offload. 577 */ 578 if (err != 0) 579 be_no_cksum_offload = 0; 580 /* 581 * If the far end cannot do checksum offload or we do not wish 582 * to do it, disable it. 583 */ 584 if ((be_no_cksum_offload == 1) || !xnfp->cksum_offload) 585 xnfp->cksum_offload = B_FALSE; 586 587 again: 588 err = xenbus_transaction_start(&xbt); 589 if (err != 0) { 590 xenbus_dev_error(xsd, EIO, "starting transaction"); 591 return; 592 } 593 594 err = xenbus_printf(xbt, xsname, "tx-ring-ref", "%u", 595 xnfp->tx_ring_ref); 596 if (err != 0) { 597 message = "writing tx ring-ref"; 598 goto abort_transaction; 599 } 600 601 err = xenbus_printf(xbt, xsname, "rx-ring-ref", "%u", 602 xnfp->rx_ring_ref); 603 if (err != 0) { 604 message = "writing rx ring-ref"; 605 goto abort_transaction; 606 } 607 608 err = xenbus_printf(xbt, xsname, "event-channel", "%u", xnfp->evtchn); 609 if (err != 0) { 610 message = "writing event-channel"; 611 goto abort_transaction; 612 } 613 614 err = xenbus_printf(xbt, xsname, "feature-rx-notify", "%d", 1); 615 if (err != 0) { 616 message = "writing feature-rx-notify"; 617 goto abort_transaction; 618 } 619 620 if (!xnfp->tx_pages_readonly) { 621 err = xenbus_printf(xbt, xsname, "feature-tx-writable", 622 "%d", 1); 623 if (err != 0) { 624 message = "writing feature-tx-writable"; 625 goto abort_transaction; 626 } 627 } 628 629 err = xenbus_printf(xbt, xsname, "feature-no-csum-offload", "%d", 630 xnfp->cksum_offload ? 0 : 1); 631 if (err != 0) { 632 message = "writing feature-no-csum-offload"; 633 goto abort_transaction; 634 } 635 636 err = xenbus_printf(xbt, xsname, "state", "%d", XenbusStateConnected); 637 if (err != 0) { 638 message = "writing frontend XenbusStateConnected"; 639 goto abort_transaction; 640 } 641 642 err = xenbus_transaction_end(xbt, 0); 643 if (err != 0) { 644 if (err == EAGAIN) 645 goto again; 646 xenbus_dev_error(xsd, err, "completing transaction"); 647 } 648 649 return; 650 651 abort_transaction: 652 (void) xenbus_transaction_end(xbt, 1); 653 xenbus_dev_error(xsd, err, "%s", message); 654 } 655 656 /* 657 * attach(9E) -- Attach a device to the system 658 * 659 * Called once for each board successfully probed. 660 */ 661 static int 662 xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) 663 { 664 mac_register_t *macp; 665 xnf_t *xnfp; 666 int err; 667 668 #ifdef XNF_DEBUG 669 if (xnfdebug & XNF_DEBUG_DDI) 670 printf("xnf%d: attach(0x%p)\n", ddi_get_instance(devinfo), 671 (void *)devinfo); 672 #endif 673 674 switch (cmd) { 675 case DDI_RESUME: 676 xnfp = ddi_get_driver_private(devinfo); 677 678 (void) xvdi_resume(devinfo); 679 (void) xvdi_alloc_evtchn(devinfo); 680 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, 681 (caddr_t)xnfp); 682 xnfp->evtchn = xvdi_get_evtchn(devinfo); 683 xnf_be_connect(xnfp); 684 /* 685 * Our MAC address didn't necessarily change, but 686 * given that we may be resuming this OS instance 687 * on a different machine (or on the same one and got a 688 * different MAC address because we didn't specify one of 689 * our own), it's useful to claim that 690 * it changed in order that IP send out a 691 * gratuitous ARP. 692 */ 693 mac_unicst_update(xnfp->mh, xnfp->mac_addr); 694 return (DDI_SUCCESS); 695 696 case DDI_ATTACH: 697 break; 698 699 default: 700 return (DDI_FAILURE); 701 } 702 703 /* 704 * Allocate gld_mac_info_t and xnf_instance structures 705 */ 706 macp = mac_alloc(MAC_VERSION); 707 if (macp == NULL) 708 return (DDI_FAILURE); 709 xnfp = kmem_zalloc(sizeof (*xnfp), KM_SLEEP); 710 711 macp->m_dip = devinfo; 712 macp->m_driver = xnfp; 713 xnfp->devinfo = devinfo; 714 715 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 716 macp->m_src_addr = xnfp->mac_addr; 717 macp->m_callbacks = &xnf_callbacks; 718 macp->m_min_sdu = 0; 719 macp->m_max_sdu = XNF_MAXPKT; 720 721 xnfp->running = B_FALSE; 722 xnfp->connected = B_FALSE; 723 xnfp->cksum_offload = xnf_cksum_offload; 724 xnfp->tx_pages_readonly = xnf_tx_pages_readonly; 725 726 /* 727 * Get the iblock cookie with which to initialize the mutexes. 728 */ 729 if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->icookie) 730 != DDI_SUCCESS) 731 goto failure; 732 /* 733 * Driver locking strategy: the txlock protects all paths 734 * through the driver, except the interrupt thread. 735 * If the interrupt thread needs to do something which could 736 * affect the operation of any other part of the driver, 737 * it needs to acquire the txlock mutex. 738 */ 739 mutex_init(&xnfp->tx_buf_mutex, 740 NULL, MUTEX_DRIVER, xnfp->icookie); 741 mutex_init(&xnfp->rx_buf_mutex, 742 NULL, MUTEX_DRIVER, xnfp->icookie); 743 mutex_init(&xnfp->txlock, 744 NULL, MUTEX_DRIVER, xnfp->icookie); 745 mutex_init(&xnfp->intrlock, 746 NULL, MUTEX_DRIVER, xnfp->icookie); 747 cv_init(&xnfp->cv, NULL, CV_DEFAULT, NULL); 748 749 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, 750 &xnfp->gref_tx_head) < 0) { 751 cmn_err(CE_WARN, "xnf%d: can't alloc tx grant refs", 752 ddi_get_instance(xnfp->devinfo)); 753 goto late_failure; 754 } 755 if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, 756 &xnfp->gref_rx_head) < 0) { 757 cmn_err(CE_WARN, "xnf%d: can't alloc rx grant refs", 758 ddi_get_instance(xnfp->devinfo)); 759 goto late_failure; 760 } 761 if (xnf_alloc_dma_resources(xnfp) == DDI_FAILURE) { 762 cmn_err(CE_WARN, "xnf%d: failed to allocate and initialize " 763 "driver data structures", ddi_get_instance(xnfp->devinfo)); 764 goto late_failure; 765 } 766 767 xnfp->rx_ring.sring->rsp_event = xnfp->tx_ring.sring->rsp_event = 1; 768 769 xnfp->tx_ring_ref = GRANT_INVALID_REF; 770 xnfp->rx_ring_ref = GRANT_INVALID_REF; 771 772 /* set driver private pointer now */ 773 ddi_set_driver_private(devinfo, xnfp); 774 775 if (xvdi_add_event_handler(devinfo, XS_OE_STATE, oe_state_change) 776 != DDI_SUCCESS) 777 goto late_failure; 778 779 if (!xnf_kstat_init(xnfp)) 780 goto very_late_failure; 781 782 /* 783 * Allocate an event channel, add the interrupt handler and 784 * bind it to the event channel. 785 */ 786 (void) xvdi_alloc_evtchn(devinfo); 787 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, (caddr_t)xnfp); 788 xnfp->evtchn = xvdi_get_evtchn(devinfo); 789 790 /* 791 * connect to the backend 792 */ 793 xnf_be_connect(xnfp); 794 795 err = mac_register(macp, &xnfp->mh); 796 mac_free(macp); 797 macp = NULL; 798 if (err != 0) 799 goto very_very_late_failure; 800 801 return (DDI_SUCCESS); 802 803 very_very_late_failure: 804 kstat_delete(xnfp->kstat_aux); 805 806 very_late_failure: 807 xvdi_remove_event_handler(devinfo, XS_OE_STATE); 808 ddi_remove_intr(devinfo, 0, xnfp->icookie); 809 xnfp->evtchn = INVALID_EVTCHN; 810 811 late_failure: 812 xnf_release_dma_resources(xnfp); 813 cv_destroy(&xnfp->cv); 814 mutex_destroy(&xnfp->rx_buf_mutex); 815 mutex_destroy(&xnfp->txlock); 816 mutex_destroy(&xnfp->intrlock); 817 818 failure: 819 kmem_free(xnfp, sizeof (*xnfp)); 820 if (macp != NULL) 821 mac_free(macp); 822 823 return (DDI_FAILURE); 824 } 825 826 /* detach(9E) -- Detach a device from the system */ 827 static int 828 xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) 829 { 830 xnf_t *xnfp; /* Our private device info */ 831 int i; 832 833 #ifdef XNF_DEBUG 834 if (xnfdebug & XNF_DEBUG_DDI) 835 printf("xnf_detach(0x%p)\n", (void *)devinfo); 836 #endif 837 838 xnfp = ddi_get_driver_private(devinfo); 839 840 switch (cmd) { 841 case DDI_SUSPEND: 842 ddi_remove_intr(devinfo, 0, xnfp->icookie); 843 844 xvdi_suspend(devinfo); 845 846 mutex_enter(&xnfp->intrlock); 847 mutex_enter(&xnfp->txlock); 848 849 xnfp->evtchn = INVALID_EVTCHN; 850 xnfp->connected = B_FALSE; 851 mutex_exit(&xnfp->txlock); 852 mutex_exit(&xnfp->intrlock); 853 return (DDI_SUCCESS); 854 855 case DDI_DETACH: 856 break; 857 858 default: 859 return (DDI_FAILURE); 860 } 861 862 if (xnfp->connected) 863 return (DDI_FAILURE); 864 865 /* Wait for receive buffers to be returned; give up after 5 seconds */ 866 i = 50; 867 868 mutex_enter(&xnfp->rx_buf_mutex); 869 while (xnfp->rx_bufs_outstanding > 0) { 870 mutex_exit(&xnfp->rx_buf_mutex); 871 delay(drv_usectohz(100000)); 872 if (--i == 0) { 873 cmn_err(CE_WARN, 874 "xnf%d: never reclaimed all the " 875 "receive buffers. Still have %d " 876 "buffers outstanding.", 877 ddi_get_instance(xnfp->devinfo), 878 xnfp->rx_bufs_outstanding); 879 return (DDI_FAILURE); 880 } 881 mutex_enter(&xnfp->rx_buf_mutex); 882 } 883 mutex_exit(&xnfp->rx_buf_mutex); 884 885 kstat_delete(xnfp->kstat_aux); 886 887 if (mac_unregister(xnfp->mh) != 0) 888 return (DDI_FAILURE); 889 890 /* Stop the receiver */ 891 xnf_stop(xnfp); 892 893 xvdi_remove_event_handler(devinfo, XS_OE_STATE); 894 895 /* Remove the interrupt */ 896 ddi_remove_intr(devinfo, 0, xnfp->icookie); 897 898 /* Release any pending xmit mblks */ 899 xnf_release_mblks(xnfp); 900 901 /* Release all DMA resources */ 902 xnf_release_dma_resources(xnfp); 903 904 cv_destroy(&xnfp->cv); 905 mutex_destroy(&xnfp->rx_buf_mutex); 906 mutex_destroy(&xnfp->txlock); 907 mutex_destroy(&xnfp->intrlock); 908 909 kmem_free(xnfp, sizeof (*xnfp)); 910 911 return (DDI_SUCCESS); 912 } 913 914 /* 915 * xnf_set_mac_addr() -- set the physical network address on the board. 916 */ 917 /*ARGSUSED*/ 918 static int 919 xnf_set_mac_addr(void *arg, const uint8_t *macaddr) 920 { 921 xnf_t *xnfp = arg; 922 923 #ifdef XNF_DEBUG 924 if (xnfdebug & XNF_DEBUG_TRACE) 925 printf("xnf%d: set_mac_addr(0x%p): " 926 "%02x:%02x:%02x:%02x:%02x:%02x\n", 927 ddi_get_instance(xnfp->devinfo), 928 (void *)xnfp, macaddr[0], macaddr[1], macaddr[2], 929 macaddr[3], macaddr[4], macaddr[5]); 930 #endif 931 /* 932 * We can't set our macaddr. 933 * 934 * XXPV dme: Why not? 935 */ 936 return (ENOTSUP); 937 } 938 939 /* 940 * xnf_set_multicast() -- set (enable) or disable a multicast address. 941 * 942 * Program the hardware to enable/disable the multicast address 943 * in "mcast". Enable if "add" is true, disable if false. 944 */ 945 /*ARGSUSED*/ 946 static int 947 xnf_set_multicast(void *arg, boolean_t add, const uint8_t *mca) 948 { 949 xnf_t *xnfp = arg; 950 951 #ifdef XNF_DEBUG 952 if (xnfdebug & XNF_DEBUG_TRACE) 953 printf("xnf%d set_multicast(0x%p): " 954 "%02x:%02x:%02x:%02x:%02x:%02x\n", 955 ddi_get_instance(xnfp->devinfo), 956 (void *)xnfp, mca[0], mca[1], mca[2], 957 mca[3], mca[4], mca[5]); 958 #endif 959 960 /* 961 * XXPV dme: Ideally we'd relay the address to the backend for 962 * enabling. The protocol doesn't support that (interesting 963 * extension), so we simply succeed and hope that the relevant 964 * packets are going to arrive. 965 * 966 * If protocol support is added for enable/disable then we'll 967 * need to keep a list of those in use and re-add on resume. 968 */ 969 return (0); 970 } 971 972 /* 973 * xnf_set_promiscuous() -- set or reset promiscuous mode on the board 974 * 975 * Program the hardware to enable/disable promiscuous mode. 976 */ 977 /*ARGSUSED*/ 978 static int 979 xnf_set_promiscuous(void *arg, boolean_t on) 980 { 981 xnf_t *xnfp = arg; 982 983 #ifdef XNF_DEBUG 984 if (xnfdebug & XNF_DEBUG_TRACE) 985 printf("xnf%d set_promiscuous(0x%p, %x)\n", 986 ddi_get_instance(xnfp->devinfo), 987 (void *)xnfp, on); 988 #endif 989 /* 990 * We can't really do this, but we pretend that we can in 991 * order that snoop will work. 992 */ 993 return (0); 994 } 995 996 /* 997 * Clean buffers that we have responses for from the transmit ring. 998 */ 999 static int 1000 xnf_clean_tx_ring(xnf_t *xnfp) 1001 { 1002 RING_IDX next_resp, i; 1003 struct tx_pktinfo *reap; 1004 int id; 1005 grant_ref_t ref; 1006 1007 ASSERT(MUTEX_HELD(&xnfp->txlock)); 1008 1009 do { 1010 /* 1011 * index of next transmission ack 1012 */ 1013 next_resp = xnfp->tx_ring.sring->rsp_prod; 1014 membar_consumer(); 1015 /* 1016 * Clean tx packets from ring that we have responses for 1017 */ 1018 for (i = xnfp->tx_ring.rsp_cons; i != next_resp; i++) { 1019 id = RING_GET_RESPONSE(&xnfp->tx_ring, i)->id; 1020 reap = &xnfp->tx_pkt_info[id]; 1021 ref = reap->grant_ref; 1022 /* 1023 * Return id to free list 1024 */ 1025 reap->id = xnfp->tx_pkt_id_list; 1026 xnfp->tx_pkt_id_list = id; 1027 if (gnttab_query_foreign_access(ref) != 0) 1028 panic("tx grant still in use" 1029 "by backend domain"); 1030 (void) ddi_dma_unbind_handle(reap->dma_handle); 1031 (void) gnttab_end_foreign_access_ref(ref, 1032 xnfp->tx_pages_readonly); 1033 gnttab_release_grant_reference(&xnfp->gref_tx_head, 1034 ref); 1035 freemsg(reap->mp); 1036 reap->mp = NULL; 1037 reap->grant_ref = GRANT_INVALID_REF; 1038 if (reap->bdesc != NULL) 1039 xnf_free_xmit_buffer(reap->bdesc); 1040 reap->bdesc = NULL; 1041 } 1042 xnfp->tx_ring.rsp_cons = next_resp; 1043 membar_enter(); 1044 } while (next_resp != xnfp->tx_ring.sring->rsp_prod); 1045 return (NET_TX_RING_SIZE - (xnfp->tx_ring.sring->req_prod - next_resp)); 1046 } 1047 1048 /* 1049 * If we need to pull up data from either a packet that crosses a page 1050 * boundary or consisting of multiple mblks, do it here. We allocate 1051 * a page aligned buffer and copy the data into it. The header for the 1052 * allocated buffer is returned. (which is also allocated here) 1053 */ 1054 static struct xnf_buffer_desc * 1055 xnf_pullupmsg(xnf_t *xnfp, mblk_t *mp) 1056 { 1057 struct xnf_buffer_desc *bdesc; 1058 mblk_t *mptr; 1059 caddr_t bp; 1060 int len; 1061 1062 /* 1063 * get a xmit buffer from the xmit buffer pool 1064 */ 1065 mutex_enter(&xnfp->rx_buf_mutex); 1066 bdesc = xnf_get_xmit_buffer(xnfp); 1067 mutex_exit(&xnfp->rx_buf_mutex); 1068 if (bdesc == NULL) 1069 return (bdesc); 1070 /* 1071 * Copy the data into the buffer 1072 */ 1073 xnfp->stat_xmit_pullup++; 1074 bp = bdesc->buf; 1075 for (mptr = mp; mptr != NULL; mptr = mptr->b_cont) { 1076 len = mptr->b_wptr - mptr->b_rptr; 1077 bcopy(mptr->b_rptr, bp, len); 1078 bp += len; 1079 } 1080 return (bdesc); 1081 } 1082 1083 /* 1084 * xnf_send_one() -- send a packet 1085 * 1086 * Called when a packet is ready to be transmitted. A pointer to an 1087 * M_DATA message that contains the packet is passed to this routine. 1088 * At least the complete LLC header is contained in the message's 1089 * first message block, and the remainder of the packet is contained 1090 * within additional M_DATA message blocks linked to the first 1091 * message block. 1092 * 1093 */ 1094 static boolean_t 1095 xnf_send_one(xnf_t *xnfp, mblk_t *mp) 1096 { 1097 struct xnf_buffer_desc *xmitbuf; 1098 struct tx_pktinfo *txp_info; 1099 mblk_t *mptr; 1100 ddi_dma_cookie_t dma_cookie; 1101 RING_IDX slot, txs_out; 1102 int length = 0, i, pktlen = 0, rc, tx_id; 1103 int tx_ring_freespace, page_oops; 1104 uint_t ncookies; 1105 volatile netif_tx_request_t *txrp; 1106 caddr_t bufaddr; 1107 grant_ref_t ref; 1108 unsigned long mfn; 1109 uint32_t pflags; 1110 domid_t oeid; 1111 1112 #ifdef XNF_DEBUG 1113 if (xnfdebug & XNF_DEBUG_SEND) 1114 printf("xnf%d send(0x%p, 0x%p)\n", 1115 ddi_get_instance(xnfp->devinfo), 1116 (void *)xnfp, (void *)mp); 1117 #endif 1118 1119 ASSERT(mp != NULL); 1120 ASSERT(mp->b_next == NULL); 1121 ASSERT(MUTEX_HELD(&xnfp->txlock)); 1122 1123 tx_ring_freespace = xnf_clean_tx_ring(xnfp); 1124 ASSERT(tx_ring_freespace >= 0); 1125 1126 oeid = xvdi_get_oeid(xnfp->devinfo); 1127 xnfp->stat_xmit_attempt++; 1128 /* 1129 * If there are no xmit ring slots available, return. 1130 */ 1131 if (tx_ring_freespace == 0) { 1132 xnfp->stat_xmit_defer++; 1133 return (B_FALSE); /* Send should be retried */ 1134 } 1135 1136 slot = xnfp->tx_ring.sring->req_prod; 1137 /* Count the number of mblks in message and compute packet size */ 1138 for (i = 0, mptr = mp; mptr != NULL; mptr = mptr->b_cont, i++) 1139 pktlen += (mptr->b_wptr - mptr->b_rptr); 1140 1141 /* Make sure packet isn't too large */ 1142 if (pktlen > XNF_FRAMESIZE) { 1143 cmn_err(CE_WARN, "xnf%d: large packet %d bytes", 1144 ddi_get_instance(xnfp->devinfo), pktlen); 1145 freemsg(mp); 1146 return (B_FALSE); 1147 } 1148 1149 /* 1150 * Test if we cross a page boundary with our buffer 1151 */ 1152 page_oops = (i == 1) && 1153 (xnf_btop((size_t)mp->b_rptr) != 1154 xnf_btop((size_t)(mp->b_rptr + pktlen))); 1155 /* 1156 * XXPV - unfortunately, the Xen virtual net device currently 1157 * doesn't support multiple packet frags, so this will always 1158 * end up doing the pullup if we got more than one packet. 1159 */ 1160 if (i > xnf_max_tx_frags || page_oops) { 1161 if (page_oops) 1162 xnfp->stat_xmit_pagebndry++; 1163 if ((xmitbuf = xnf_pullupmsg(xnfp, mp)) == NULL) { 1164 /* could not allocate resources? */ 1165 #ifdef XNF_DEBUG 1166 cmn_err(CE_WARN, "xnf%d: pullupmsg failed", 1167 ddi_get_instance(xnfp->devinfo)); 1168 #endif 1169 xnfp->stat_xmit_defer++; 1170 return (B_FALSE); /* Retry send */ 1171 } 1172 bufaddr = xmitbuf->buf; 1173 } else { 1174 xmitbuf = NULL; 1175 bufaddr = (caddr_t)mp->b_rptr; 1176 } 1177 1178 /* set up data descriptor */ 1179 length = pktlen; 1180 1181 /* 1182 * Get packet id from free list 1183 */ 1184 tx_id = xnfp->tx_pkt_id_list; 1185 ASSERT(tx_id < NET_TX_RING_SIZE); 1186 txp_info = &xnfp->tx_pkt_info[tx_id]; 1187 xnfp->tx_pkt_id_list = txp_info->id; 1188 txp_info->id = tx_id; 1189 1190 /* Prepare for DMA mapping of tx buffer(s) */ 1191 rc = ddi_dma_addr_bind_handle(txp_info->dma_handle, 1192 NULL, bufaddr, length, DDI_DMA_WRITE | DDI_DMA_STREAMING, 1193 DDI_DMA_DONTWAIT, 0, &dma_cookie, &ncookies); 1194 if (rc != DDI_DMA_MAPPED) { 1195 ASSERT(rc != DDI_DMA_INUSE); 1196 ASSERT(rc != DDI_DMA_PARTIAL_MAP); 1197 /* 1198 * Return id to free list 1199 */ 1200 txp_info->id = xnfp->tx_pkt_id_list; 1201 xnfp->tx_pkt_id_list = tx_id; 1202 if (rc == DDI_DMA_NORESOURCES) { 1203 xnfp->stat_xmit_defer++; 1204 return (B_FALSE); /* Retry later */ 1205 } 1206 #ifdef XNF_DEBUG 1207 cmn_err(CE_WARN, "xnf%d: bind_handle failed (%x)", 1208 ddi_get_instance(xnfp->devinfo), rc); 1209 #endif 1210 return (B_FALSE); 1211 } 1212 1213 ASSERT(ncookies == 1); 1214 ref = gnttab_claim_grant_reference(&xnfp->gref_tx_head); 1215 ASSERT((signed short)ref >= 0); 1216 mfn = xnf_btop(pa_to_ma((paddr_t)dma_cookie.dmac_laddress)); 1217 gnttab_grant_foreign_access_ref(ref, oeid, mfn, 1218 xnfp->tx_pages_readonly); 1219 txp_info->grant_ref = ref; 1220 txrp = RING_GET_REQUEST(&xnfp->tx_ring, slot); 1221 txrp->gref = ref; 1222 txrp->size = dma_cookie.dmac_size; 1223 txrp->offset = (uintptr_t)bufaddr & PAGEOFFSET; 1224 txrp->id = tx_id; 1225 txrp->flags = 0; 1226 hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags); 1227 if (pflags != 0) { 1228 ASSERT(xnfp->cksum_offload); 1229 /* 1230 * If the local protocol stack requests checksum 1231 * offload we set the 'checksum blank' flag, 1232 * indicating to the peer that we need the checksum 1233 * calculated for us. 1234 * 1235 * We _don't_ set the validated flag, because we haven't 1236 * validated that the data and the checksum match. 1237 */ 1238 txrp->flags |= NETTXF_csum_blank; 1239 xnfp->stat_tx_cksum_deferred++; 1240 } 1241 membar_producer(); 1242 xnfp->tx_ring.sring->req_prod = slot + 1; 1243 1244 txp_info->mp = mp; 1245 txp_info->bdesc = xmitbuf; 1246 1247 txs_out = xnfp->tx_ring.sring->req_prod - xnfp->tx_ring.sring->rsp_prod; 1248 if (xnfp->tx_ring.sring->req_prod - xnfp->tx_ring.rsp_cons < 1249 XNF_TX_FREE_THRESH) { 1250 /* 1251 * The ring is getting full; Set up this packet 1252 * to cause an interrupt. 1253 */ 1254 xnfp->tx_ring.sring->rsp_event = 1255 xnfp->tx_ring.sring->rsp_prod + txs_out; 1256 } 1257 1258 xnfp->stat_opackets++; 1259 xnfp->stat_obytes += pktlen; 1260 1261 return (B_TRUE); /* successful transmit attempt */ 1262 } 1263 1264 mblk_t * 1265 xnf_send(void *arg, mblk_t *mp) 1266 { 1267 xnf_t *xnfp = arg; 1268 mblk_t *next; 1269 boolean_t sent_something = B_FALSE; 1270 1271 mutex_enter(&xnfp->txlock); 1272 1273 /* 1274 * Transmission attempts should be impossible without having 1275 * previously called xnf_start(). 1276 */ 1277 ASSERT(xnfp->running); 1278 1279 /* 1280 * Wait for getting connected to the backend 1281 */ 1282 while (!xnfp->connected) { 1283 cv_wait(&xnfp->cv, &xnfp->txlock); 1284 } 1285 1286 while (mp != NULL) { 1287 next = mp->b_next; 1288 mp->b_next = NULL; 1289 1290 if (!xnf_send_one(xnfp, mp)) { 1291 mp->b_next = next; 1292 break; 1293 } 1294 1295 mp = next; 1296 sent_something = B_TRUE; 1297 } 1298 1299 if (sent_something) 1300 ec_notify_via_evtchn(xnfp->evtchn); 1301 1302 mutex_exit(&xnfp->txlock); 1303 1304 return (mp); 1305 } 1306 1307 /* 1308 * xnf_intr() -- ring interrupt service routine 1309 */ 1310 static uint_t 1311 xnf_intr(caddr_t arg) 1312 { 1313 xnf_t *xnfp = (xnf_t *)arg; 1314 int tx_ring_space; 1315 1316 mutex_enter(&xnfp->intrlock); 1317 1318 /* 1319 * If not connected to the peer or not started by the upper 1320 * layers we cannot usefully handle interrupts. 1321 */ 1322 if (!(xnfp->connected && xnfp->running)) { 1323 mutex_exit(&xnfp->intrlock); 1324 return (DDI_INTR_UNCLAIMED); 1325 } 1326 1327 #ifdef XNF_DEBUG 1328 if (xnfdebug & XNF_DEBUG_INT) 1329 printf("xnf%d intr(0x%p)\n", 1330 ddi_get_instance(xnfp->devinfo), (void *)xnfp); 1331 #endif 1332 if (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->rx_ring)) { 1333 mblk_t *mp; 1334 1335 if ((mp = xnf_process_recv(xnfp)) != NULL) 1336 mac_rx(xnfp->mh, xnfp->rx_handle, mp); 1337 } 1338 1339 /* 1340 * Is tx ring nearly full? 1341 */ 1342 #define inuse(r) ((r).sring->req_prod - (r).rsp_cons) 1343 1344 if ((NET_TX_RING_SIZE - inuse(xnfp->tx_ring)) < XNF_TX_FREE_THRESH) { 1345 /* 1346 * Yes, clean it and try to start any blocked xmit 1347 * streams. 1348 */ 1349 mutex_enter(&xnfp->txlock); 1350 tx_ring_space = xnf_clean_tx_ring(xnfp); 1351 mutex_exit(&xnfp->txlock); 1352 if (tx_ring_space > XNF_TX_FREE_THRESH) { 1353 mutex_exit(&xnfp->intrlock); 1354 mac_tx_update(xnfp->mh); 1355 mutex_enter(&xnfp->intrlock); 1356 } else { 1357 /* 1358 * Schedule another tx interrupt when we have 1359 * sent enough packets to cross the threshold. 1360 */ 1361 xnfp->tx_ring.sring->rsp_event = 1362 xnfp->tx_ring.sring->rsp_prod + 1363 XNF_TX_FREE_THRESH - tx_ring_space + 1; 1364 } 1365 } 1366 #undef inuse 1367 1368 xnfp->stat_intr++; 1369 mutex_exit(&xnfp->intrlock); 1370 return (DDI_INTR_CLAIMED); /* indicate that the interrupt was for us */ 1371 } 1372 1373 /* 1374 * xnf_start() -- start the board receiving and enable interrupts. 1375 */ 1376 static int 1377 xnf_start(void *arg) 1378 { 1379 xnf_t *xnfp = arg; 1380 1381 #ifdef XNF_DEBUG 1382 if (xnfdebug & XNF_DEBUG_TRACE) 1383 printf("xnf%d start(0x%p)\n", 1384 ddi_get_instance(xnfp->devinfo), (void *)xnfp); 1385 #endif 1386 1387 mutex_enter(&xnfp->intrlock); 1388 mutex_enter(&xnfp->txlock); 1389 1390 /* Accept packets from above. */ 1391 xnfp->running = B_TRUE; 1392 1393 mutex_exit(&xnfp->txlock); 1394 mutex_exit(&xnfp->intrlock); 1395 1396 return (0); 1397 } 1398 1399 /* xnf_stop() - disable hardware */ 1400 static void 1401 xnf_stop(void *arg) 1402 { 1403 xnf_t *xnfp = arg; 1404 1405 #ifdef XNF_DEBUG 1406 if (xnfdebug & XNF_DEBUG_TRACE) 1407 printf("xnf%d stop(0x%p)\n", 1408 ddi_get_instance(xnfp->devinfo), (void *)xnfp); 1409 #endif 1410 1411 mutex_enter(&xnfp->intrlock); 1412 mutex_enter(&xnfp->txlock); 1413 1414 xnfp->running = B_FALSE; 1415 1416 mutex_exit(&xnfp->txlock); 1417 mutex_exit(&xnfp->intrlock); 1418 } 1419 1420 /* 1421 * Driver private functions follow 1422 */ 1423 1424 /* 1425 * Hang buffer on rx ring 1426 */ 1427 static void 1428 rx_buffer_hang(xnf_t *xnfp, struct xnf_buffer_desc *bdesc) 1429 { 1430 volatile netif_rx_request_t *reqp; 1431 RING_IDX hang_ix; 1432 grant_ref_t ref; 1433 domid_t oeid; 1434 1435 oeid = xvdi_get_oeid(xnfp->devinfo); 1436 1437 ASSERT(MUTEX_HELD(&xnfp->intrlock)); 1438 reqp = RING_GET_REQUEST(&xnfp->rx_ring, xnfp->rx_ring.req_prod_pvt); 1439 hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->rx_ring, 0)); 1440 ASSERT(xnfp->rxpkt_bufptr[hang_ix] == NULL); 1441 if (bdesc->grant_ref == GRANT_INVALID_REF) { 1442 ref = gnttab_claim_grant_reference(&xnfp->gref_rx_head); 1443 ASSERT((signed short)ref >= 0); 1444 bdesc->grant_ref = ref; 1445 gnttab_grant_foreign_transfer_ref(ref, oeid); 1446 } 1447 reqp->id = hang_ix; 1448 reqp->gref = bdesc->grant_ref; 1449 bdesc->id = hang_ix; 1450 xnfp->rxpkt_bufptr[hang_ix] = bdesc; 1451 membar_producer(); 1452 xnfp->rx_ring.req_prod_pvt++; 1453 } 1454 1455 1456 /* Process all queued received packets */ 1457 static mblk_t * 1458 xnf_process_recv(xnf_t *xnfp) 1459 { 1460 volatile netif_rx_response_t *rxpkt; 1461 mblk_t *mp, *head, *tail; 1462 struct xnf_buffer_desc *bdesc; 1463 extern mblk_t *desballoc(unsigned char *, size_t, uint_t, frtn_t *); 1464 boolean_t hwcsum = B_FALSE, notify, work_to_do; 1465 size_t len; 1466 pfn_t pfn; 1467 long cnt; 1468 1469 head = tail = NULL; 1470 loop: 1471 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->rx_ring)) { 1472 1473 rxpkt = RING_GET_RESPONSE(&xnfp->rx_ring, 1474 xnfp->rx_ring.rsp_cons); 1475 1476 /* 1477 * Take buffer off of receive ring 1478 */ 1479 hwcsum = B_FALSE; 1480 bdesc = xnfp->rxpkt_bufptr[rxpkt->id]; 1481 xnfp->rxpkt_bufptr[rxpkt->id] = NULL; 1482 ASSERT(bdesc->id == rxpkt->id); 1483 if (rxpkt->status <= 0) { 1484 mp = NULL; 1485 xnfp->stat_errrcv++; 1486 if (rxpkt->status == 0) 1487 xnfp->stat_runt++; 1488 if (rxpkt->status == NETIF_RSP_ERROR) 1489 xnfp->stat_mac_rcv_error++; 1490 if (rxpkt->status == NETIF_RSP_DROPPED) 1491 xnfp->stat_norcvbuf++; 1492 /* 1493 * re-hang the buffer 1494 */ 1495 rx_buffer_hang(xnfp, bdesc); 1496 } else { 1497 grant_ref_t ref = bdesc->grant_ref; 1498 struct xnf_buffer_desc *new_bdesc; 1499 unsigned long off = rxpkt->offset; 1500 unsigned long mfn; 1501 1502 len = rxpkt->status; 1503 ASSERT(off + len <= PAGEOFFSET); 1504 if (ref == GRANT_INVALID_REF) { 1505 mp = NULL; 1506 new_bdesc = bdesc; 1507 cmn_err(CE_WARN, "Bad rx grant reference %d " 1508 "from dom %d", ref, 1509 xvdi_get_oeid(xnfp->devinfo)); 1510 goto luckless; 1511 } 1512 bdesc->grant_ref = GRANT_INVALID_REF; 1513 mfn = gnttab_end_foreign_transfer_ref(ref); 1514 ASSERT(mfn != MFN_INVALID); 1515 ASSERT(hat_getpfnum(kas.a_hat, bdesc->buf) == 1516 PFN_INVALID); 1517 gnttab_release_grant_reference(&xnfp->gref_rx_head, 1518 ref); 1519 reassign_pfn(xnf_btop(bdesc->buf_phys), mfn); 1520 hat_devload(kas.a_hat, bdesc->buf, PAGESIZE, 1521 xnf_btop(bdesc->buf_phys), 1522 PROT_READ | PROT_WRITE, HAT_LOAD); 1523 balloon_drv_added(1); 1524 if (rxpkt->flags & NETRXF_data_validated) 1525 hwcsum = B_TRUE; 1526 if (len <= xnf_rx_bcopy_thresh) { 1527 /* 1528 * For small buffers, just copy the data 1529 * and send the copy upstream. 1530 */ 1531 new_bdesc = NULL; 1532 } else { 1533 /* 1534 * We send a pointer to this data upstream; 1535 * we need a new buffer to replace this one. 1536 */ 1537 mutex_enter(&xnfp->rx_buf_mutex); 1538 new_bdesc = xnf_get_buffer(xnfp); 1539 if (new_bdesc != NULL) { 1540 xnfp->rx_bufs_outstanding++; 1541 } else { 1542 xnfp->stat_rx_no_ringbuf++; 1543 } 1544 mutex_exit(&xnfp->rx_buf_mutex); 1545 } 1546 1547 if (new_bdesc == NULL) { 1548 /* 1549 * Don't have a new ring buffer; bcopy the data 1550 * from the buffer, and preserve the 1551 * original buffer 1552 */ 1553 if ((mp = allocb(len, BPRI_MED)) == NULL) { 1554 /* 1555 * Could't get buffer to copy to, 1556 * drop this data, and re-hang 1557 * the buffer on the ring. 1558 */ 1559 xnfp->stat_norcvbuf++; 1560 } else { 1561 bcopy(bdesc->buf + off, mp->b_wptr, 1562 len); 1563 } 1564 /* 1565 * Give the buffer page back to xen 1566 */ 1567 pfn = xnf_btop(bdesc->buf_phys); 1568 cnt = balloon_free_pages(1, &mfn, bdesc->buf, 1569 &pfn); 1570 if (cnt != 1) { 1571 cmn_err(CE_WARN, "unable to give a " 1572 "page back to the hypervisor\n"); 1573 } 1574 new_bdesc = bdesc; 1575 } else { 1576 if ((mp = desballoc((unsigned char *)bdesc->buf, 1577 off + len, 0, (frtn_t *)bdesc)) == NULL) { 1578 /* 1579 * Couldn't get mblk to pass recv data 1580 * up with, free the old ring buffer 1581 */ 1582 xnfp->stat_norcvbuf++; 1583 xnf_rcv_complete(bdesc); 1584 goto luckless; 1585 } 1586 (void) ddi_dma_sync(bdesc->dma_handle, 1587 0, 0, DDI_DMA_SYNC_FORCPU); 1588 1589 mp->b_wptr += off; 1590 mp->b_rptr += off; 1591 } 1592 luckless: 1593 if (mp) 1594 mp->b_wptr += len; 1595 /* re-hang old or hang new buffer */ 1596 rx_buffer_hang(xnfp, new_bdesc); 1597 } 1598 if (mp) { 1599 if (hwcsum) { 1600 /* 1601 * If the peer says that the data has 1602 * been validated then we declare that 1603 * the full checksum has been 1604 * verified. 1605 * 1606 * We don't look at the "checksum 1607 * blank" flag, and hence could have a 1608 * packet here that we are asserting 1609 * is good with a blank checksum. 1610 * 1611 * The hardware checksum offload 1612 * specification says that we must 1613 * provide the actual checksum as well 1614 * as an assertion that it is valid, 1615 * but the protocol stack doesn't 1616 * actually use it and some other 1617 * drivers don't bother, so we don't. 1618 * If it was necessary we could grovel 1619 * in the packet to find it. 1620 */ 1621 1622 (void) hcksum_assoc(mp, NULL, 1623 NULL, 0, 0, 0, 0, 1624 HCK_FULLCKSUM | 1625 HCK_FULLCKSUM_OK, 1626 0); 1627 xnfp->stat_rx_cksum_no_need++; 1628 } 1629 if (head == NULL) { 1630 head = tail = mp; 1631 } else { 1632 tail->b_next = mp; 1633 tail = mp; 1634 } 1635 1636 ASSERT(mp->b_next == NULL); 1637 1638 xnfp->stat_ipackets++; 1639 xnfp->stat_rbytes += len; 1640 } 1641 1642 xnfp->rx_ring.rsp_cons++; 1643 } 1644 1645 /* 1646 * Has more data come in since we started? 1647 */ 1648 /* LINTED: constant in conditional context */ 1649 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->rx_ring, work_to_do); 1650 if (work_to_do) 1651 goto loop; 1652 1653 /* 1654 * Indicate to the backend that we have re-filled the receive 1655 * ring. 1656 */ 1657 /* LINTED: constant in conditional context */ 1658 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->rx_ring, notify); 1659 if (notify) 1660 ec_notify_via_evtchn(xnfp->evtchn); 1661 1662 return (head); 1663 } 1664 1665 /* Called when the upper layers free a message we passed upstream */ 1666 static void 1667 xnf_rcv_complete(struct xnf_buffer_desc *bdesc) 1668 { 1669 xnf_t *xnfp = bdesc->xnfp; 1670 pfn_t pfn; 1671 long cnt; 1672 1673 /* One less outstanding receive buffer */ 1674 mutex_enter(&xnfp->rx_buf_mutex); 1675 --xnfp->rx_bufs_outstanding; 1676 /* 1677 * Return buffer to the free list, unless the free list is getting 1678 * too large. XXX - this threshold may need tuning. 1679 */ 1680 if (xnfp->rx_descs_free < xnf_recv_bufs_lowat) { 1681 /* 1682 * Unmap the page, and hand the machine page back 1683 * to xen so it can be re-used as a backend net buffer. 1684 */ 1685 pfn = xnf_btop(bdesc->buf_phys); 1686 cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn); 1687 if (cnt != 1) { 1688 cmn_err(CE_WARN, "unable to give a page back to the " 1689 "hypervisor\n"); 1690 } 1691 1692 bdesc->next = xnfp->free_list; 1693 xnfp->free_list = bdesc; 1694 xnfp->rx_descs_free++; 1695 mutex_exit(&xnfp->rx_buf_mutex); 1696 } else { 1697 /* 1698 * We can return everything here since we have a free buffer 1699 * that we have not given the backing page for back to xen. 1700 */ 1701 --xnfp->recv_buffer_count; 1702 mutex_exit(&xnfp->rx_buf_mutex); 1703 (void) ddi_dma_unbind_handle(bdesc->dma_handle); 1704 ddi_dma_mem_free(&bdesc->acc_handle); 1705 ddi_dma_free_handle(&bdesc->dma_handle); 1706 kmem_free(bdesc, sizeof (*bdesc)); 1707 } 1708 } 1709 1710 /* 1711 * xnf_alloc_dma_resources() -- initialize the drivers structures 1712 */ 1713 static int 1714 xnf_alloc_dma_resources(xnf_t *xnfp) 1715 { 1716 dev_info_t *devinfo = xnfp->devinfo; 1717 int i; 1718 size_t len; 1719 ddi_dma_cookie_t dma_cookie; 1720 uint_t ncookies; 1721 struct xnf_buffer_desc *bdesc; 1722 int rc; 1723 caddr_t rptr; 1724 1725 xnfp->n_recvs = NET_RX_RING_SIZE; 1726 xnfp->max_recv_bufs = xnf_recv_bufs_hiwat; 1727 1728 xnfp->n_xmits = NET_TX_RING_SIZE; 1729 1730 /* 1731 * The code below allocates all the DMA data structures that 1732 * need to be released when the driver is detached. 1733 * 1734 * First allocate handles for mapping (virtual address) pointers to 1735 * transmit data buffers to physical addresses 1736 */ 1737 for (i = 0; i < xnfp->n_xmits; i++) { 1738 if ((rc = ddi_dma_alloc_handle(devinfo, 1739 &tx_buffer_dma_attr, DDI_DMA_SLEEP, 0, 1740 &xnfp->tx_pkt_info[i].dma_handle)) != DDI_SUCCESS) 1741 return (DDI_FAILURE); 1742 } 1743 1744 /* 1745 * Allocate page for the transmit descriptor ring. 1746 */ 1747 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 1748 DDI_DMA_SLEEP, 0, &xnfp->tx_ring_dma_handle) != DDI_SUCCESS) 1749 goto alloc_error; 1750 1751 if (ddi_dma_mem_alloc(xnfp->tx_ring_dma_handle, 1752 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 1753 DDI_DMA_SLEEP, 0, &rptr, &len, 1754 &xnfp->tx_ring_dma_acchandle) != DDI_SUCCESS) { 1755 ddi_dma_free_handle(&xnfp->tx_ring_dma_handle); 1756 xnfp->tx_ring_dma_handle = NULL; 1757 goto alloc_error; 1758 } 1759 1760 if ((rc = ddi_dma_addr_bind_handle(xnfp->tx_ring_dma_handle, NULL, 1761 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 1762 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 1763 ddi_dma_mem_free(&xnfp->tx_ring_dma_acchandle); 1764 ddi_dma_free_handle(&xnfp->tx_ring_dma_handle); 1765 xnfp->tx_ring_dma_handle = NULL; 1766 xnfp->tx_ring_dma_acchandle = NULL; 1767 if (rc == DDI_DMA_NORESOURCES) 1768 goto alloc_error; 1769 else 1770 goto error; 1771 } 1772 1773 ASSERT(ncookies == 1); 1774 bzero(rptr, PAGESIZE); 1775 /* LINTED: constant in conditional context */ 1776 SHARED_RING_INIT((netif_tx_sring_t *)rptr); 1777 /* LINTED: constant in conditional context */ 1778 FRONT_RING_INIT(&xnfp->tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE); 1779 xnfp->tx_ring_phys_addr = dma_cookie.dmac_laddress; 1780 1781 /* 1782 * Allocate page for the receive descriptor ring. 1783 */ 1784 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 1785 DDI_DMA_SLEEP, 0, &xnfp->rx_ring_dma_handle) != DDI_SUCCESS) 1786 goto alloc_error; 1787 1788 if (ddi_dma_mem_alloc(xnfp->rx_ring_dma_handle, 1789 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 1790 DDI_DMA_SLEEP, 0, &rptr, &len, 1791 &xnfp->rx_ring_dma_acchandle) != DDI_SUCCESS) { 1792 ddi_dma_free_handle(&xnfp->rx_ring_dma_handle); 1793 xnfp->rx_ring_dma_handle = NULL; 1794 goto alloc_error; 1795 } 1796 1797 if ((rc = ddi_dma_addr_bind_handle(xnfp->rx_ring_dma_handle, NULL, 1798 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 1799 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 1800 ddi_dma_mem_free(&xnfp->rx_ring_dma_acchandle); 1801 ddi_dma_free_handle(&xnfp->rx_ring_dma_handle); 1802 xnfp->rx_ring_dma_handle = NULL; 1803 xnfp->rx_ring_dma_acchandle = NULL; 1804 if (rc == DDI_DMA_NORESOURCES) 1805 goto alloc_error; 1806 else 1807 goto error; 1808 } 1809 1810 ASSERT(ncookies == 1); 1811 bzero(rptr, PAGESIZE); 1812 /* LINTED: constant in conditional context */ 1813 SHARED_RING_INIT((netif_rx_sring_t *)rptr); 1814 /* LINTED: constant in conditional context */ 1815 FRONT_RING_INIT(&xnfp->rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE); 1816 xnfp->rx_ring_phys_addr = dma_cookie.dmac_laddress; 1817 1818 /* 1819 * Preallocate receive buffers for each receive descriptor. 1820 */ 1821 1822 /* Set up the "free list" of receive buffer descriptors */ 1823 for (i = 0; i < xnfp->n_recvs; i++) { 1824 if ((bdesc = xnf_alloc_buffer(xnfp)) == NULL) 1825 goto alloc_error; 1826 bdesc->next = xnfp->free_list; 1827 xnfp->free_list = bdesc; 1828 } 1829 1830 return (DDI_SUCCESS); 1831 1832 alloc_error: 1833 cmn_err(CE_WARN, "xnf%d: could not allocate enough DMA memory", 1834 ddi_get_instance(xnfp->devinfo)); 1835 error: 1836 xnf_release_dma_resources(xnfp); 1837 return (DDI_FAILURE); 1838 } 1839 1840 /* 1841 * Release all DMA resources in the opposite order from acquisition 1842 * Should not be called until all outstanding esballoc buffers 1843 * have been returned. 1844 */ 1845 static void 1846 xnf_release_dma_resources(xnf_t *xnfp) 1847 { 1848 int i; 1849 1850 /* 1851 * Free receive buffers which are currently associated with 1852 * descriptors 1853 */ 1854 for (i = 0; i < xnfp->n_recvs; i++) { 1855 struct xnf_buffer_desc *bp; 1856 1857 if ((bp = xnfp->rxpkt_bufptr[i]) == NULL) 1858 continue; 1859 xnf_free_buffer(bp); 1860 xnfp->rxpkt_bufptr[i] = NULL; 1861 } 1862 1863 /* Free the receive ring buffer */ 1864 if (xnfp->rx_ring_dma_acchandle != NULL) { 1865 (void) ddi_dma_unbind_handle(xnfp->rx_ring_dma_handle); 1866 ddi_dma_mem_free(&xnfp->rx_ring_dma_acchandle); 1867 ddi_dma_free_handle(&xnfp->rx_ring_dma_handle); 1868 xnfp->rx_ring_dma_acchandle = NULL; 1869 } 1870 /* Free the transmit ring buffer */ 1871 if (xnfp->tx_ring_dma_acchandle != NULL) { 1872 (void) ddi_dma_unbind_handle(xnfp->tx_ring_dma_handle); 1873 ddi_dma_mem_free(&xnfp->tx_ring_dma_acchandle); 1874 ddi_dma_free_handle(&xnfp->tx_ring_dma_handle); 1875 xnfp->tx_ring_dma_acchandle = NULL; 1876 } 1877 } 1878 1879 static void 1880 xnf_release_mblks(xnf_t *xnfp) 1881 { 1882 int i; 1883 1884 for (i = 0; i < xnfp->n_xmits; i++) { 1885 if (xnfp->tx_pkt_info[i].mp == NULL) 1886 continue; 1887 freemsg(xnfp->tx_pkt_info[i].mp); 1888 xnfp->tx_pkt_info[i].mp = NULL; 1889 (void) ddi_dma_unbind_handle(xnfp->tx_pkt_info[i].dma_handle); 1890 } 1891 } 1892 1893 /* 1894 * Remove a xmit buffer descriptor from the head of the free list and return 1895 * a pointer to it. If no buffers on list, attempt to allocate a new one. 1896 * Called with the tx_buf_mutex held. 1897 */ 1898 static struct xnf_buffer_desc * 1899 xnf_get_xmit_buffer(xnf_t *xnfp) 1900 { 1901 struct xnf_buffer_desc *bdesc; 1902 1903 bdesc = xnfp->xmit_free_list; 1904 if (bdesc != NULL) { 1905 xnfp->xmit_free_list = bdesc->next; 1906 } else { 1907 bdesc = xnf_alloc_xmit_buffer(xnfp); 1908 } 1909 return (bdesc); 1910 } 1911 1912 /* 1913 * Remove a buffer descriptor from the head of the free list and return 1914 * a pointer to it. If no buffers on list, attempt to allocate a new one. 1915 * Called with the rx_buf_mutex held. 1916 */ 1917 static struct xnf_buffer_desc * 1918 xnf_get_buffer(xnf_t *xnfp) 1919 { 1920 struct xnf_buffer_desc *bdesc; 1921 1922 bdesc = xnfp->free_list; 1923 if (bdesc != NULL) { 1924 xnfp->free_list = bdesc->next; 1925 xnfp->rx_descs_free--; 1926 } else { 1927 bdesc = xnf_alloc_buffer(xnfp); 1928 } 1929 return (bdesc); 1930 } 1931 1932 /* 1933 * Free a xmit buffer back to the xmit free list 1934 */ 1935 static void 1936 xnf_free_xmit_buffer(struct xnf_buffer_desc *bp) 1937 { 1938 xnf_t *xnfp = bp->xnfp; 1939 1940 mutex_enter(&xnfp->tx_buf_mutex); 1941 bp->next = xnfp->xmit_free_list; 1942 xnfp->xmit_free_list = bp; 1943 mutex_exit(&xnfp->tx_buf_mutex); 1944 } 1945 1946 /* 1947 * Put a buffer descriptor onto the head of the free list. 1948 * We can't really free these buffers back to the kernel 1949 * since we have given away their backing page to be used 1950 * by the back end net driver. 1951 */ 1952 static void 1953 xnf_free_buffer(struct xnf_buffer_desc *bp) 1954 { 1955 xnf_t *xnfp = bp->xnfp; 1956 1957 mutex_enter(&xnfp->rx_buf_mutex); 1958 bp->next = xnfp->free_list; 1959 xnfp->free_list = bp; 1960 xnfp->rx_descs_free++; 1961 mutex_exit(&xnfp->rx_buf_mutex); 1962 } 1963 1964 /* 1965 * Allocate a DMA-able xmit buffer, including a structure to 1966 * keep track of the buffer. Called with tx_buf_mutex held. 1967 */ 1968 static struct xnf_buffer_desc * 1969 xnf_alloc_xmit_buffer(xnf_t *xnfp) 1970 { 1971 struct xnf_buffer_desc *bdesc; 1972 size_t len; 1973 1974 if ((bdesc = kmem_zalloc(sizeof (*bdesc), KM_NOSLEEP)) == NULL) 1975 return (NULL); 1976 1977 /* allocate a DMA access handle for receive buffer */ 1978 if (ddi_dma_alloc_handle(xnfp->devinfo, &tx_buffer_dma_attr, 1979 0, 0, &bdesc->dma_handle) != DDI_SUCCESS) 1980 goto failure; 1981 1982 /* Allocate DMA-able memory for transmit buffer */ 1983 if (ddi_dma_mem_alloc(bdesc->dma_handle, 1984 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0, 1985 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 1986 goto late_failure; 1987 1988 bdesc->xnfp = xnfp; 1989 xnfp->xmit_buffer_count++; 1990 1991 return (bdesc); 1992 1993 late_failure: 1994 ddi_dma_free_handle(&bdesc->dma_handle); 1995 1996 failure: 1997 kmem_free(bdesc, sizeof (*bdesc)); 1998 return (NULL); 1999 } 2000 2001 /* 2002 * Allocate a DMA-able receive buffer, including a structure to 2003 * keep track of the buffer. Called with rx_buf_mutex held. 2004 */ 2005 static struct xnf_buffer_desc * 2006 xnf_alloc_buffer(xnf_t *xnfp) 2007 { 2008 struct xnf_buffer_desc *bdesc; 2009 size_t len; 2010 uint_t ncookies; 2011 ddi_dma_cookie_t dma_cookie; 2012 long cnt; 2013 pfn_t pfn; 2014 2015 if (xnfp->recv_buffer_count >= xnfp->max_recv_bufs) 2016 return (NULL); 2017 2018 if ((bdesc = kmem_zalloc(sizeof (*bdesc), KM_NOSLEEP)) == NULL) 2019 return (NULL); 2020 2021 /* allocate a DMA access handle for receive buffer */ 2022 if (ddi_dma_alloc_handle(xnfp->devinfo, &rx_buffer_dma_attr, 2023 0, 0, &bdesc->dma_handle) != DDI_SUCCESS) 2024 goto failure; 2025 2026 /* Allocate DMA-able memory for receive buffer */ 2027 if (ddi_dma_mem_alloc(bdesc->dma_handle, 2028 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0, 2029 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 2030 goto late_failure; 2031 2032 /* bind to virtual address of buffer to get physical address */ 2033 if (ddi_dma_addr_bind_handle(bdesc->dma_handle, NULL, 2034 bdesc->buf, PAGESIZE, DDI_DMA_READ | DDI_DMA_STREAMING, 2035 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies) != DDI_DMA_MAPPED) 2036 goto late_late_failure; 2037 2038 bdesc->buf_phys = dma_cookie.dmac_laddress; 2039 bdesc->xnfp = xnfp; 2040 bdesc->free_rtn.free_func = xnf_rcv_complete; 2041 bdesc->free_rtn.free_arg = (char *)bdesc; 2042 bdesc->grant_ref = GRANT_INVALID_REF; 2043 ASSERT(ncookies == 1); 2044 2045 xnfp->recv_buffer_count++; 2046 /* 2047 * Unmap the page, and hand the machine page back 2048 * to xen so it can be used as a backend net buffer. 2049 */ 2050 pfn = xnf_btop(bdesc->buf_phys); 2051 cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn); 2052 if (cnt != 1) { 2053 cmn_err(CE_WARN, "unable to give a page back to the " 2054 "hypervisor\n"); 2055 } 2056 2057 return (bdesc); 2058 2059 late_late_failure: 2060 ddi_dma_mem_free(&bdesc->acc_handle); 2061 2062 late_failure: 2063 ddi_dma_free_handle(&bdesc->dma_handle); 2064 2065 failure: 2066 kmem_free(bdesc, sizeof (*bdesc)); 2067 return (NULL); 2068 } 2069 2070 static int 2071 xnf_stat(void *arg, uint_t stat, uint64_t *val) 2072 { 2073 xnf_t *xnfp = arg; 2074 2075 mutex_enter(&xnfp->intrlock); 2076 mutex_enter(&xnfp->txlock); 2077 2078 #define map_stat(q, r) \ 2079 case (MAC_STAT_##q): \ 2080 *val = xnfp->stat_##r; \ 2081 break 2082 2083 switch (stat) { 2084 2085 map_stat(IPACKETS, ipackets); 2086 map_stat(OPACKETS, opackets); 2087 map_stat(RBYTES, rbytes); 2088 map_stat(OBYTES, obytes); 2089 map_stat(NORCVBUF, norcvbuf); 2090 map_stat(IERRORS, errrcv); 2091 map_stat(NOXMTBUF, xmit_defer); 2092 2093 default: 2094 mutex_exit(&xnfp->txlock); 2095 mutex_exit(&xnfp->intrlock); 2096 2097 return (ENOTSUP); 2098 } 2099 2100 #undef map_stat 2101 2102 mutex_exit(&xnfp->txlock); 2103 mutex_exit(&xnfp->intrlock); 2104 2105 return (0); 2106 } 2107 2108 /*ARGSUSED*/ 2109 static void 2110 xnf_blank(void *arg, time_t ticks, uint_t count) 2111 { 2112 /* 2113 * XXPV dme: blanking is not currently implemented. 2114 * 2115 * It's not obvious how to use the 'ticks' argument here. 2116 * 2117 * 'Count' might be used as an indicator of how to set 2118 * rsp_event when posting receive buffers to the rx_ring. It 2119 * would replace the code at the tail of xnf_process_recv() 2120 * that simply indicates that the next completed packet should 2121 * cause an interrupt. 2122 */ 2123 } 2124 2125 static void 2126 xnf_resources(void *arg) 2127 { 2128 xnf_t *xnfp = arg; 2129 mac_rx_fifo_t mrf; 2130 2131 mrf.mrf_type = MAC_RX_FIFO; 2132 mrf.mrf_blank = xnf_blank; 2133 mrf.mrf_arg = (void *)xnfp; 2134 mrf.mrf_normal_blank_time = 128; /* XXPV dme: see xnf_blank() */ 2135 mrf.mrf_normal_pkt_count = 8; /* XXPV dme: see xnf_blank() */ 2136 2137 xnfp->rx_handle = mac_resource_add(xnfp->mh, 2138 (mac_resource_t *)&mrf); 2139 } 2140 2141 /*ARGSUSED*/ 2142 static void 2143 xnf_ioctl(void *arg, queue_t *q, mblk_t *mp) 2144 { 2145 miocnak(q, mp, 0, EINVAL); 2146 } 2147 2148 static boolean_t 2149 xnf_getcapab(void *arg, mac_capab_t cap, void *cap_data) 2150 { 2151 xnf_t *xnfp = arg; 2152 2153 switch (cap) { 2154 case MAC_CAPAB_HCKSUM: { 2155 uint32_t *capab = cap_data; 2156 2157 /* 2158 * We declare ourselves capable of HCKSUM_INET_PARTIAL 2159 * in order that the protocol stack insert the 2160 * pseudo-header checksum in packets that it passes 2161 * down to us. 2162 * 2163 * Whilst the flag used to communicate with dom0 is 2164 * called "NETTXF_csum_blank", the checksum in the 2165 * packet must contain the pseudo-header checksum and 2166 * not zero. (In fact, a Solaris dom0 is happy to deal 2167 * with a checksum of zero, but a Linux dom0 is not.) 2168 */ 2169 if (xnfp->cksum_offload) 2170 *capab = HCKSUM_INET_PARTIAL; 2171 else 2172 *capab = 0; 2173 break; 2174 } 2175 2176 case MAC_CAPAB_POLL: 2177 /* Just return B_TRUE. */ 2178 break; 2179 2180 default: 2181 return (B_FALSE); 2182 } 2183 2184 return (B_TRUE); 2185 } 2186 2187 /*ARGSUSED*/ 2188 static void 2189 oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 2190 void *arg, void *impl_data) 2191 { 2192 xnf_t *xnfp = ddi_get_driver_private(dip); 2193 XenbusState new_state = *(XenbusState *)impl_data; 2194 2195 ASSERT(xnfp != NULL); 2196 2197 switch (new_state) { 2198 case XenbusStateConnected: 2199 mutex_enter(&xnfp->intrlock); 2200 mutex_enter(&xnfp->txlock); 2201 2202 xnfp->connected = B_TRUE; 2203 cv_broadcast(&xnfp->cv); 2204 2205 mutex_exit(&xnfp->txlock); 2206 mutex_exit(&xnfp->intrlock); 2207 2208 ec_notify_via_evtchn(xnfp->evtchn); 2209 break; 2210 2211 default: 2212 break; 2213 } 2214 } 2215