1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * 31 * Copyright (c) 2004 Christian Limpach. 32 * All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. This section intentionally left blank. 43 * 4. The name of the author may not be used to endorse or promote products 44 * derived from this software without specific prior written permission. 45 * 46 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 47 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 48 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 49 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 50 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 52 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 53 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 54 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 55 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 56 */ 57 /* 58 * Section 3 of the above license was updated in response to bug 6379571. 59 */ 60 61 /* 62 * xnf.c - Nemo-based network driver for domU 63 */ 64 65 #include <sys/types.h> 66 #include <sys/hypervisor.h> 67 #include <sys/debug.h> 68 #include <sys/errno.h> 69 #include <sys/param.h> 70 #include <sys/sysmacros.h> 71 #include <sys/systm.h> 72 #include <sys/stropts.h> 73 #include <sys/stream.h> 74 #include <sys/strsubr.h> 75 #include <sys/kmem.h> 76 #include <sys/conf.h> 77 #include <sys/ddi.h> 78 #include <sys/devops.h> 79 #include <sys/sunddi.h> 80 #include <sys/sunndi.h> 81 #include <sys/ksynch.h> 82 #include <sys/dlpi.h> 83 #include <sys/ethernet.h> 84 #include <sys/strsun.h> 85 #include <sys/pattr.h> 86 #include <inet/common.h> 87 #include <inet/ip.h> 88 #include <sys/stat.h> 89 #include <sys/modctl.h> 90 #include <sys/mac.h> 91 #include <sys/mac_ether.h> 92 #include <sys/atomic.h> 93 #include <sys/errno.h> 94 #include <sys/machsystm.h> 95 #include <sys/bootconf.h> 96 #include <sys/bootsvcs.h> 97 #include <sys/bootinfo.h> 98 #include <sys/promif.h> 99 #include <sys/archsystm.h> 100 #include <sys/gnttab.h> 101 #include <sys/mach_mmu.h> 102 #include <xen/public/memory.h> 103 104 #include "xnf.h" 105 106 #include <sys/evtchn_impl.h> 107 #include <sys/balloon_impl.h> 108 #include <xen/sys/xendev.h> 109 110 /* 111 * Declarations and Module Linkage 112 */ 113 114 #define IDENT "Virtual Ethernet driver" 115 116 #if defined(DEBUG) || defined(__lint) 117 #define XNF_DEBUG 118 int xnfdebug = 0; 119 #endif 120 121 /* 122 * On a 32 bit PAE system physical and machine addresses are larger 123 * than 32 bits. ddi_btop() on such systems take an unsigned long 124 * argument, and so addresses above 4G are truncated before ddi_btop() 125 * gets to see them. To avoid this, code the shift operation here. 126 */ 127 #define xnf_btop(addr) ((addr) >> PAGESHIFT) 128 129 boolean_t xnf_cksum_offload = B_TRUE; 130 /* 131 * Should pages used for transmit be readonly for the peer? 132 */ 133 boolean_t xnf_tx_pages_readonly = B_FALSE; 134 /* 135 * Packets under this size are bcopied instead of using desballoc. 136 * Choose a value > XNF_FRAMESIZE (1514) to force the receive path to 137 * always copy. 138 */ 139 unsigned int xnf_rx_bcopy_thresh = 64; 140 141 unsigned int xnf_max_tx_frags = 1; 142 143 /* Required system entry points */ 144 static int xnf_attach(dev_info_t *, ddi_attach_cmd_t); 145 static int xnf_detach(dev_info_t *, ddi_detach_cmd_t); 146 147 /* Required driver entry points for Nemo */ 148 static int xnf_start(void *); 149 static void xnf_stop(void *); 150 static int xnf_set_mac_addr(void *, const uint8_t *); 151 static int xnf_set_multicast(void *, boolean_t, const uint8_t *); 152 static int xnf_set_promiscuous(void *, boolean_t); 153 static mblk_t *xnf_send(void *, mblk_t *); 154 static uint_t xnf_intr(caddr_t); 155 static int xnf_stat(void *, uint_t, uint64_t *); 156 static void xnf_blank(void *, time_t, uint_t); 157 static void xnf_resources(void *); 158 static void xnf_ioctl(void *, queue_t *, mblk_t *); 159 static boolean_t xnf_getcapab(void *, mac_capab_t, void *); 160 161 /* Driver private functions */ 162 static int xnf_alloc_dma_resources(xnf_t *); 163 static void xnf_release_dma_resources(xnf_t *); 164 static mblk_t *xnf_process_recv(xnf_t *); 165 static void xnf_rcv_complete(struct xnf_buffer_desc *); 166 static void xnf_release_mblks(xnf_t *); 167 static struct xnf_buffer_desc *xnf_alloc_xmit_buffer(xnf_t *); 168 static struct xnf_buffer_desc *xnf_alloc_buffer(xnf_t *); 169 static struct xnf_buffer_desc *xnf_get_xmit_buffer(xnf_t *); 170 static struct xnf_buffer_desc *xnf_get_buffer(xnf_t *); 171 static void xnf_free_buffer(struct xnf_buffer_desc *); 172 static void xnf_free_xmit_buffer(struct xnf_buffer_desc *); 173 void xnf_send_driver_status(int, int); 174 static void rx_buffer_hang(xnf_t *, struct xnf_buffer_desc *); 175 static int xnf_clean_tx_ring(xnf_t *); 176 static void oe_state_change(dev_info_t *, ddi_eventcookie_t, 177 void *, void *); 178 179 /* 180 * XXPV dme: remove MC_IOCTL? 181 */ 182 static mac_callbacks_t xnf_callbacks = { 183 MC_RESOURCES | MC_IOCTL | MC_GETCAPAB, 184 xnf_stat, 185 xnf_start, 186 xnf_stop, 187 xnf_set_promiscuous, 188 xnf_set_multicast, 189 xnf_set_mac_addr, 190 xnf_send, 191 xnf_resources, 192 xnf_ioctl, 193 xnf_getcapab 194 }; 195 196 #define GRANT_INVALID_REF 0 197 int xnf_recv_bufs_lowat = 4 * NET_RX_RING_SIZE; 198 int xnf_recv_bufs_hiwat = 8 * NET_RX_RING_SIZE; /* default max */ 199 200 /* DMA attributes for network ring buffer */ 201 static ddi_dma_attr_t ringbuf_dma_attr = { 202 DMA_ATTR_V0, /* version of this structure */ 203 0, /* lowest usable address */ 204 0xffffffffffffffffULL, /* highest usable address */ 205 0x7fffffff, /* maximum DMAable byte count */ 206 MMU_PAGESIZE, /* alignment in bytes */ 207 0x7ff, /* bitmap of burst sizes */ 208 1, /* minimum transfer */ 209 0xffffffffU, /* maximum transfer */ 210 0xffffffffffffffffULL, /* maximum segment length */ 211 1, /* maximum number of segments */ 212 1, /* granularity */ 213 0, /* flags (reserved) */ 214 }; 215 216 /* DMA attributes for transmit data */ 217 static ddi_dma_attr_t tx_buffer_dma_attr = { 218 DMA_ATTR_V0, /* version of this structure */ 219 0, /* lowest usable address */ 220 0xffffffffffffffffULL, /* highest usable address */ 221 0x7fffffff, /* maximum DMAable byte count */ 222 MMU_PAGESIZE, /* alignment in bytes */ 223 0x7ff, /* bitmap of burst sizes */ 224 1, /* minimum transfer */ 225 0xffffffffU, /* maximum transfer */ 226 0xffffffffffffffffULL, /* maximum segment length */ 227 1, /* maximum number of segments */ 228 1, /* granularity */ 229 0, /* flags (reserved) */ 230 }; 231 232 /* DMA attributes for a receive buffer */ 233 static ddi_dma_attr_t rx_buffer_dma_attr = { 234 DMA_ATTR_V0, /* version of this structure */ 235 0, /* lowest usable address */ 236 0xffffffffffffffffULL, /* highest usable address */ 237 0x7fffffff, /* maximum DMAable byte count */ 238 MMU_PAGESIZE, /* alignment in bytes */ 239 0x7ff, /* bitmap of burst sizes */ 240 1, /* minimum transfer */ 241 0xffffffffU, /* maximum transfer */ 242 0xffffffffffffffffULL, /* maximum segment length */ 243 1, /* maximum number of segments */ 244 1, /* granularity */ 245 0, /* flags (reserved) */ 246 }; 247 248 /* DMA access attributes for registers and descriptors */ 249 static ddi_device_acc_attr_t accattr = { 250 DDI_DEVICE_ATTR_V0, 251 DDI_STRUCTURE_LE_ACC, /* This is a little-endian device */ 252 DDI_STRICTORDER_ACC 253 }; 254 255 /* DMA access attributes for data: NOT to be byte swapped. */ 256 static ddi_device_acc_attr_t data_accattr = { 257 DDI_DEVICE_ATTR_V0, 258 DDI_NEVERSWAP_ACC, 259 DDI_STRICTORDER_ACC 260 }; 261 262 unsigned char xnf_broadcastaddr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 263 int xnf_diagnose = 0; /* Patchable global for diagnostic purposes */ 264 265 DDI_DEFINE_STREAM_OPS(xnf_dev_ops, nulldev, nulldev, xnf_attach, xnf_detach, 266 nodev, NULL, D_MP, NULL); 267 268 static struct modldrv xnf_modldrv = { 269 &mod_driverops, /* Type of module. This one is a driver */ 270 IDENT " %I%", /* short description */ 271 &xnf_dev_ops /* driver specific ops */ 272 }; 273 274 static struct modlinkage modlinkage = { 275 MODREV_1, &xnf_modldrv, NULL 276 }; 277 278 int 279 _init(void) 280 { 281 int r; 282 283 mac_init_ops(&xnf_dev_ops, "xnf"); 284 r = mod_install(&modlinkage); 285 if (r != DDI_SUCCESS) 286 mac_fini_ops(&xnf_dev_ops); 287 288 return (r); 289 } 290 291 int 292 _fini(void) 293 { 294 return (EBUSY); /* XXPV dme: should be removable */ 295 } 296 297 int 298 _info(struct modinfo *modinfop) 299 { 300 return (mod_info(&modlinkage, modinfop)); 301 } 302 303 /* 304 * Statistics. 305 */ 306 /* XXPV: most of these names need re-"nice"ing */ 307 static char *xnf_aux_statistics[] = { 308 "tx_cksum_deferred", 309 "rx_cksum_no_need", 310 "intr", 311 "xmit_pullup", 312 "xmit_pagebndry", 313 "xmit_attempt", 314 "rx_no_ringbuf", 315 "mac_rcv_error", 316 "runt", 317 }; 318 319 static int 320 xnf_kstat_aux_update(kstat_t *ksp, int flag) 321 { 322 xnf_t *xnfp; 323 kstat_named_t *knp; 324 325 if (flag != KSTAT_READ) 326 return (EACCES); 327 328 xnfp = ksp->ks_private; 329 knp = ksp->ks_data; 330 331 /* 332 * Assignment order should match that of the names in 333 * xnf_aux_statistics. 334 */ 335 (knp++)->value.ui64 = xnfp->stat_tx_cksum_deferred; 336 (knp++)->value.ui64 = xnfp->stat_rx_cksum_no_need; 337 338 (knp++)->value.ui64 = xnfp->stat_intr; 339 (knp++)->value.ui64 = xnfp->stat_xmit_pullup; 340 (knp++)->value.ui64 = xnfp->stat_xmit_pagebndry; 341 (knp++)->value.ui64 = xnfp->stat_xmit_attempt; 342 (knp++)->value.ui64 = xnfp->stat_rx_no_ringbuf; 343 (knp++)->value.ui64 = xnfp->stat_mac_rcv_error; 344 (knp++)->value.ui64 = xnfp->stat_runt; 345 346 return (0); 347 } 348 349 static boolean_t 350 xnf_kstat_init(xnf_t *xnfp) 351 { 352 int nstat = sizeof (xnf_aux_statistics) / 353 sizeof (xnf_aux_statistics[0]); 354 char **cp = xnf_aux_statistics; 355 kstat_named_t *knp; 356 357 /* 358 * Create and initialise kstats. 359 */ 360 if ((xnfp->kstat_aux = kstat_create("xnf", 361 ddi_get_instance(xnfp->devinfo), 362 "aux_statistics", "net", KSTAT_TYPE_NAMED, 363 nstat, 0)) == NULL) 364 return (B_FALSE); 365 366 xnfp->kstat_aux->ks_private = xnfp; 367 xnfp->kstat_aux->ks_update = xnf_kstat_aux_update; 368 369 knp = xnfp->kstat_aux->ks_data; 370 while (nstat > 0) { 371 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 372 373 knp++; 374 cp++; 375 nstat--; 376 } 377 378 kstat_install(xnfp->kstat_aux); 379 380 return (B_TRUE); 381 } 382 383 static int 384 xnf_setup_rings(xnf_t *xnfp) 385 { 386 int ix, err; 387 RING_IDX i; 388 struct xnf_buffer_desc *bdesc, *rbp; 389 struct xenbus_device *xsd; 390 domid_t oeid; 391 392 oeid = xvdi_get_oeid(xnfp->devinfo); 393 xsd = xvdi_get_xsd(xnfp->devinfo); 394 395 if (xnfp->tx_ring_ref != GRANT_INVALID_REF) 396 gnttab_end_foreign_access(xnfp->tx_ring_ref, 0, 0); 397 398 err = gnttab_grant_foreign_access(oeid, 399 xnf_btop(pa_to_ma(xnfp->tx_ring_phys_addr)), 0); 400 if (err <= 0) { 401 err = -err; 402 xenbus_dev_error(xsd, err, "granting access to tx ring page"); 403 goto out; 404 } 405 xnfp->tx_ring_ref = (grant_ref_t)err; 406 407 if (xnfp->rx_ring_ref != GRANT_INVALID_REF) 408 gnttab_end_foreign_access(xnfp->rx_ring_ref, 0, 0); 409 410 err = gnttab_grant_foreign_access(oeid, 411 xnf_btop(pa_to_ma(xnfp->rx_ring_phys_addr)), 0); 412 if (err <= 0) { 413 err = -err; 414 xenbus_dev_error(xsd, err, "granting access to rx ring page"); 415 goto out; 416 } 417 xnfp->rx_ring_ref = (grant_ref_t)err; 418 419 420 mutex_enter(&xnfp->intrlock); 421 422 /* 423 * Cleanup the TX ring. We just clean up any valid tx_pktinfo structs 424 * and reset the ring. Note that this can lose packets after a resume, 425 * but we expect to stagger on. 426 */ 427 mutex_enter(&xnfp->txlock); 428 429 for (i = 0; i < xnfp->n_xmits; i++) { 430 struct tx_pktinfo *txp = &xnfp->tx_pkt_info[i]; 431 432 txp->id = i + 1; 433 434 if (txp->grant_ref == GRANT_INVALID_REF) { 435 ASSERT(txp->mp == NULL); 436 ASSERT(txp->bdesc == NULL); 437 continue; 438 } 439 440 if (gnttab_query_foreign_access(txp->grant_ref) != 0) 441 panic("tx grant still in use by backend domain"); 442 443 freemsg(txp->mp); 444 txp->mp = NULL; 445 446 (void) ddi_dma_unbind_handle(txp->dma_handle); 447 448 if (txp->bdesc != NULL) { 449 xnf_free_xmit_buffer(txp->bdesc); 450 txp->bdesc = NULL; 451 } 452 453 (void) gnttab_end_foreign_access_ref(txp->grant_ref, 454 xnfp->tx_pages_readonly); 455 gnttab_release_grant_reference(&xnfp->gref_tx_head, 456 txp->grant_ref); 457 txp->grant_ref = GRANT_INVALID_REF; 458 } 459 460 xnfp->tx_pkt_id_list = 0; 461 xnfp->tx_ring.rsp_cons = 0; 462 xnfp->tx_ring.sring->req_prod = 0; 463 xnfp->tx_ring.sring->rsp_prod = 0; 464 xnfp->tx_ring.sring->rsp_event = 1; 465 466 mutex_exit(&xnfp->txlock); 467 468 /* 469 * Rebuild the RX ring. We have to rebuild the RX ring because some of 470 * our pages are currently flipped out so we can't just free the RX 471 * buffers. Reclaim any unprocessed recv buffers, they won't be 472 * useable anyway since the mfn's they refer to are no longer valid. 473 * Grant the backend domain access to each hung rx buffer. 474 */ 475 i = xnfp->rx_ring.rsp_cons; 476 while (i++ != xnfp->rx_ring.sring->req_prod) { 477 volatile netif_rx_request_t *rxrp; 478 479 rxrp = RING_GET_REQUEST(&xnfp->rx_ring, i); 480 ix = rxrp - RING_GET_REQUEST(&xnfp->rx_ring, 0); 481 rbp = xnfp->rxpkt_bufptr[ix]; 482 if (rbp != NULL) { 483 ASSERT(rbp->grant_ref != GRANT_INVALID_REF); 484 gnttab_grant_foreign_transfer_ref(rbp->grant_ref, 485 oeid); 486 rxrp->id = ix; 487 rxrp->gref = rbp->grant_ref; 488 } 489 } 490 /* 491 * Reset the ring pointers to initial state. 492 * Hang buffers for any empty ring slots. 493 */ 494 xnfp->rx_ring.rsp_cons = 0; 495 xnfp->rx_ring.sring->req_prod = 0; 496 xnfp->rx_ring.sring->rsp_prod = 0; 497 xnfp->rx_ring.sring->rsp_event = 1; 498 for (i = 0; i < NET_RX_RING_SIZE; i++) { 499 xnfp->rx_ring.req_prod_pvt = i; 500 if (xnfp->rxpkt_bufptr[i] != NULL) 501 continue; 502 if ((bdesc = xnf_get_buffer(xnfp)) == NULL) 503 break; 504 rx_buffer_hang(xnfp, bdesc); 505 } 506 xnfp->rx_ring.req_prod_pvt = i; 507 /* LINTED: constant in conditional context */ 508 RING_PUSH_REQUESTS(&xnfp->rx_ring); 509 510 mutex_exit(&xnfp->intrlock); 511 512 return (0); 513 514 out: 515 if (xnfp->tx_ring_ref != GRANT_INVALID_REF) 516 gnttab_end_foreign_access(xnfp->tx_ring_ref, 0, 0); 517 xnfp->tx_ring_ref = GRANT_INVALID_REF; 518 519 if (xnfp->rx_ring_ref != GRANT_INVALID_REF) 520 gnttab_end_foreign_access(xnfp->rx_ring_ref, 0, 0); 521 xnfp->rx_ring_ref = GRANT_INVALID_REF; 522 523 return (err); 524 } 525 526 /* 527 * Connect driver to back end, called to set up communication with 528 * back end driver both initially and on resume after restore/migrate. 529 */ 530 void 531 xnf_be_connect(xnf_t *xnfp) 532 { 533 char mac[ETHERADDRL * 3]; 534 const char *message; 535 xenbus_transaction_t xbt; 536 struct xenbus_device *xsd; 537 char *xsname; 538 int err, be_no_cksum_offload; 539 540 ASSERT(!xnfp->connected); 541 542 xsd = xvdi_get_xsd(xnfp->devinfo); 543 xsname = xvdi_get_xsname(xnfp->devinfo); 544 545 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->devinfo), "mac", 546 "%s", (char *)&mac[0]); 547 if (err != 0) { 548 /* 549 * bad: we're supposed to be set up with a proper mac 550 * addr. at this point 551 */ 552 cmn_err(CE_WARN, "%s%d: no mac address", 553 ddi_driver_name(xnfp->devinfo), 554 ddi_get_instance(xnfp->devinfo)); 555 return; 556 } 557 558 if (ether_aton(mac, xnfp->mac_addr) != ETHERADDRL) { 559 err = ENOENT; 560 xenbus_dev_error(xsd, ENOENT, "parsing %s/mac", xsname); 561 return; 562 } 563 564 err = xnf_setup_rings(xnfp); 565 if (err != 0) { 566 cmn_err(CE_WARN, "failed to set up tx/rx rings"); 567 xenbus_dev_error(xsd, err, "setting up ring"); 568 return; 569 } 570 571 err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->devinfo), 572 "feature-no-csum-offload", "%d", &be_no_cksum_offload); 573 /* 574 * If we fail to read the store we assume that the key is 575 * absent, implying an older domain at the far end. Older 576 * domains always support checksum offload. 577 */ 578 if (err != 0) 579 be_no_cksum_offload = 0; 580 /* 581 * If the far end cannot do checksum offload or we do not wish 582 * to do it, disable it. 583 */ 584 if ((be_no_cksum_offload == 1) || !xnfp->cksum_offload) 585 xnfp->cksum_offload = B_FALSE; 586 587 again: 588 err = xenbus_transaction_start(&xbt); 589 if (err != 0) { 590 xenbus_dev_error(xsd, EIO, "starting transaction"); 591 return; 592 } 593 594 err = xenbus_printf(xbt, xsname, "tx-ring-ref", "%u", 595 xnfp->tx_ring_ref); 596 if (err != 0) { 597 message = "writing tx ring-ref"; 598 goto abort_transaction; 599 } 600 601 err = xenbus_printf(xbt, xsname, "rx-ring-ref", "%u", 602 xnfp->rx_ring_ref); 603 if (err != 0) { 604 message = "writing rx ring-ref"; 605 goto abort_transaction; 606 } 607 608 err = xenbus_printf(xbt, xsname, "event-channel", "%u", xnfp->evtchn); 609 if (err != 0) { 610 message = "writing event-channel"; 611 goto abort_transaction; 612 } 613 614 err = xenbus_printf(xbt, xsname, "feature-rx-notify", "%d", 1); 615 if (err != 0) { 616 message = "writing feature-rx-notify"; 617 goto abort_transaction; 618 } 619 620 if (!xnfp->tx_pages_readonly) { 621 err = xenbus_printf(xbt, xsname, "feature-tx-writable", 622 "%d", 1); 623 if (err != 0) { 624 message = "writing feature-tx-writable"; 625 goto abort_transaction; 626 } 627 } 628 629 err = xenbus_printf(xbt, xsname, "feature-no-csum-offload", "%d", 630 xnfp->cksum_offload ? 0 : 1); 631 if (err != 0) { 632 message = "writing feature-no-csum-offload"; 633 goto abort_transaction; 634 } 635 636 err = xenbus_printf(xbt, xsname, "state", "%d", XenbusStateConnected); 637 if (err != 0) { 638 message = "writing frontend XenbusStateConnected"; 639 goto abort_transaction; 640 } 641 642 err = xenbus_transaction_end(xbt, 0); 643 if (err != 0) { 644 if (err == EAGAIN) 645 goto again; 646 xenbus_dev_error(xsd, err, "completing transaction"); 647 } 648 649 return; 650 651 abort_transaction: 652 (void) xenbus_transaction_end(xbt, 1); 653 xenbus_dev_error(xsd, err, "%s", message); 654 } 655 656 /* 657 * attach(9E) -- Attach a device to the system 658 * 659 * Called once for each board successfully probed. 660 */ 661 static int 662 xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) 663 { 664 mac_register_t *macp; 665 xnf_t *xnfp; 666 int err; 667 668 #ifdef XNF_DEBUG 669 if (xnfdebug & XNF_DEBUG_DDI) 670 printf("xnf%d: attach(0x%p)\n", ddi_get_instance(devinfo), 671 (void *)devinfo); 672 #endif 673 674 switch (cmd) { 675 case DDI_RESUME: 676 xnfp = ddi_get_driver_private(devinfo); 677 678 (void) xvdi_resume(devinfo); 679 (void) xvdi_alloc_evtchn(devinfo); 680 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, 681 (caddr_t)xnfp); 682 xnfp->evtchn = xvdi_get_evtchn(devinfo); 683 xnf_be_connect(xnfp); 684 /* 685 * Our MAC address didn't necessarily change, but 686 * given that we may be resuming this OS instance 687 * on a different machine (or on the same one and got a 688 * different MAC address because we didn't specify one of 689 * our own), it's useful to claim that 690 * it changed in order that IP send out a 691 * gratuitous ARP. 692 */ 693 mac_unicst_update(xnfp->mh, xnfp->mac_addr); 694 return (DDI_SUCCESS); 695 696 case DDI_ATTACH: 697 break; 698 699 default: 700 return (DDI_FAILURE); 701 } 702 703 /* 704 * Allocate gld_mac_info_t and xnf_instance structures 705 */ 706 macp = mac_alloc(MAC_VERSION); 707 if (macp == NULL) 708 return (DDI_FAILURE); 709 xnfp = kmem_zalloc(sizeof (*xnfp), KM_SLEEP); 710 711 macp->m_dip = devinfo; 712 macp->m_driver = xnfp; 713 xnfp->devinfo = devinfo; 714 715 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 716 macp->m_src_addr = xnfp->mac_addr; 717 macp->m_callbacks = &xnf_callbacks; 718 macp->m_min_sdu = 0; 719 macp->m_max_sdu = XNF_MAXPKT; 720 721 xnfp->running = B_FALSE; 722 xnfp->connected = B_FALSE; 723 xnfp->cksum_offload = xnf_cksum_offload; 724 xnfp->tx_pages_readonly = xnf_tx_pages_readonly; 725 726 /* 727 * Get the iblock cookie with which to initialize the mutexes. 728 */ 729 if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->icookie) 730 != DDI_SUCCESS) 731 goto failure; 732 /* 733 * Driver locking strategy: the txlock protects all paths 734 * through the driver, except the interrupt thread. 735 * If the interrupt thread needs to do something which could 736 * affect the operation of any other part of the driver, 737 * it needs to acquire the txlock mutex. 738 */ 739 mutex_init(&xnfp->tx_buf_mutex, 740 NULL, MUTEX_DRIVER, xnfp->icookie); 741 mutex_init(&xnfp->rx_buf_mutex, 742 NULL, MUTEX_DRIVER, xnfp->icookie); 743 mutex_init(&xnfp->txlock, 744 NULL, MUTEX_DRIVER, xnfp->icookie); 745 mutex_init(&xnfp->intrlock, 746 NULL, MUTEX_DRIVER, xnfp->icookie); 747 cv_init(&xnfp->cv, NULL, CV_DEFAULT, NULL); 748 749 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, 750 &xnfp->gref_tx_head) < 0) { 751 cmn_err(CE_WARN, "xnf%d: can't alloc tx grant refs", 752 ddi_get_instance(xnfp->devinfo)); 753 goto late_failure; 754 } 755 if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, 756 &xnfp->gref_rx_head) < 0) { 757 cmn_err(CE_WARN, "xnf%d: can't alloc rx grant refs", 758 ddi_get_instance(xnfp->devinfo)); 759 goto late_failure; 760 } 761 if (xnf_alloc_dma_resources(xnfp) == DDI_FAILURE) { 762 cmn_err(CE_WARN, "xnf%d: failed to allocate and initialize " 763 "driver data structures", ddi_get_instance(xnfp->devinfo)); 764 goto late_failure; 765 } 766 767 xnfp->rx_ring.sring->rsp_event = xnfp->tx_ring.sring->rsp_event = 1; 768 769 xnfp->tx_ring_ref = GRANT_INVALID_REF; 770 xnfp->rx_ring_ref = GRANT_INVALID_REF; 771 772 /* set driver private pointer now */ 773 ddi_set_driver_private(devinfo, xnfp); 774 775 if (xvdi_add_event_handler(devinfo, XS_OE_STATE, oe_state_change) 776 != DDI_SUCCESS) 777 goto late_failure; 778 779 if (!xnf_kstat_init(xnfp)) 780 goto very_late_failure; 781 782 /* 783 * Allocate an event channel, add the interrupt handler and 784 * bind it to the event channel. 785 */ 786 (void) xvdi_alloc_evtchn(devinfo); 787 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, (caddr_t)xnfp); 788 xnfp->evtchn = xvdi_get_evtchn(devinfo); 789 790 /* 791 * connect to the backend 792 */ 793 xnf_be_connect(xnfp); 794 795 err = mac_register(macp, &xnfp->mh); 796 mac_free(macp); 797 macp = NULL; 798 if (err != 0) 799 goto very_very_late_failure; 800 801 return (DDI_SUCCESS); 802 803 very_very_late_failure: 804 kstat_delete(xnfp->kstat_aux); 805 806 very_late_failure: 807 xvdi_remove_event_handler(devinfo, XS_OE_STATE); 808 ddi_remove_intr(devinfo, 0, xnfp->icookie); 809 xnfp->evtchn = INVALID_EVTCHN; 810 811 late_failure: 812 xnf_release_dma_resources(xnfp); 813 cv_destroy(&xnfp->cv); 814 mutex_destroy(&xnfp->rx_buf_mutex); 815 mutex_destroy(&xnfp->txlock); 816 mutex_destroy(&xnfp->intrlock); 817 818 failure: 819 kmem_free(xnfp, sizeof (*xnfp)); 820 if (macp != NULL) 821 mac_free(macp); 822 823 (void) xvdi_switch_state(devinfo, XBT_NULL, XenbusStateClosed); 824 825 return (DDI_FAILURE); 826 } 827 828 /* detach(9E) -- Detach a device from the system */ 829 static int 830 xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) 831 { 832 xnf_t *xnfp; /* Our private device info */ 833 int i; 834 835 #ifdef XNF_DEBUG 836 if (xnfdebug & XNF_DEBUG_DDI) 837 printf("xnf_detach(0x%p)\n", (void *)devinfo); 838 #endif 839 840 xnfp = ddi_get_driver_private(devinfo); 841 842 switch (cmd) { 843 case DDI_SUSPEND: 844 ddi_remove_intr(devinfo, 0, xnfp->icookie); 845 846 xvdi_suspend(devinfo); 847 848 mutex_enter(&xnfp->intrlock); 849 mutex_enter(&xnfp->txlock); 850 851 xnfp->evtchn = INVALID_EVTCHN; 852 xnfp->connected = B_FALSE; 853 mutex_exit(&xnfp->txlock); 854 mutex_exit(&xnfp->intrlock); 855 return (DDI_SUCCESS); 856 857 case DDI_DETACH: 858 break; 859 860 default: 861 return (DDI_FAILURE); 862 } 863 864 if (xnfp->connected) 865 return (DDI_FAILURE); 866 867 /* Wait for receive buffers to be returned; give up after 5 seconds */ 868 i = 50; 869 870 mutex_enter(&xnfp->rx_buf_mutex); 871 while (xnfp->rx_bufs_outstanding > 0) { 872 mutex_exit(&xnfp->rx_buf_mutex); 873 delay(drv_usectohz(100000)); 874 if (--i == 0) { 875 cmn_err(CE_WARN, 876 "xnf%d: never reclaimed all the " 877 "receive buffers. Still have %d " 878 "buffers outstanding.", 879 ddi_get_instance(xnfp->devinfo), 880 xnfp->rx_bufs_outstanding); 881 return (DDI_FAILURE); 882 } 883 mutex_enter(&xnfp->rx_buf_mutex); 884 } 885 mutex_exit(&xnfp->rx_buf_mutex); 886 887 kstat_delete(xnfp->kstat_aux); 888 889 if (mac_unregister(xnfp->mh) != 0) 890 return (DDI_FAILURE); 891 892 /* Stop the receiver */ 893 xnf_stop(xnfp); 894 895 xvdi_remove_event_handler(devinfo, XS_OE_STATE); 896 897 /* Remove the interrupt */ 898 ddi_remove_intr(devinfo, 0, xnfp->icookie); 899 900 /* Release any pending xmit mblks */ 901 xnf_release_mblks(xnfp); 902 903 /* Release all DMA resources */ 904 xnf_release_dma_resources(xnfp); 905 906 cv_destroy(&xnfp->cv); 907 mutex_destroy(&xnfp->rx_buf_mutex); 908 mutex_destroy(&xnfp->txlock); 909 mutex_destroy(&xnfp->intrlock); 910 911 kmem_free(xnfp, sizeof (*xnfp)); 912 913 return (DDI_SUCCESS); 914 } 915 916 /* 917 * xnf_set_mac_addr() -- set the physical network address on the board. 918 */ 919 /*ARGSUSED*/ 920 static int 921 xnf_set_mac_addr(void *arg, const uint8_t *macaddr) 922 { 923 xnf_t *xnfp = arg; 924 925 #ifdef XNF_DEBUG 926 if (xnfdebug & XNF_DEBUG_TRACE) 927 printf("xnf%d: set_mac_addr(0x%p): " 928 "%02x:%02x:%02x:%02x:%02x:%02x\n", 929 ddi_get_instance(xnfp->devinfo), 930 (void *)xnfp, macaddr[0], macaddr[1], macaddr[2], 931 macaddr[3], macaddr[4], macaddr[5]); 932 #endif 933 /* 934 * We can't set our macaddr. 935 * 936 * XXPV dme: Why not? 937 */ 938 return (ENOTSUP); 939 } 940 941 /* 942 * xnf_set_multicast() -- set (enable) or disable a multicast address. 943 * 944 * Program the hardware to enable/disable the multicast address 945 * in "mcast". Enable if "add" is true, disable if false. 946 */ 947 /*ARGSUSED*/ 948 static int 949 xnf_set_multicast(void *arg, boolean_t add, const uint8_t *mca) 950 { 951 xnf_t *xnfp = arg; 952 953 #ifdef XNF_DEBUG 954 if (xnfdebug & XNF_DEBUG_TRACE) 955 printf("xnf%d set_multicast(0x%p): " 956 "%02x:%02x:%02x:%02x:%02x:%02x\n", 957 ddi_get_instance(xnfp->devinfo), 958 (void *)xnfp, mca[0], mca[1], mca[2], 959 mca[3], mca[4], mca[5]); 960 #endif 961 962 /* 963 * XXPV dme: Ideally we'd relay the address to the backend for 964 * enabling. The protocol doesn't support that (interesting 965 * extension), so we simply succeed and hope that the relevant 966 * packets are going to arrive. 967 * 968 * If protocol support is added for enable/disable then we'll 969 * need to keep a list of those in use and re-add on resume. 970 */ 971 return (0); 972 } 973 974 /* 975 * xnf_set_promiscuous() -- set or reset promiscuous mode on the board 976 * 977 * Program the hardware to enable/disable promiscuous mode. 978 */ 979 /*ARGSUSED*/ 980 static int 981 xnf_set_promiscuous(void *arg, boolean_t on) 982 { 983 xnf_t *xnfp = arg; 984 985 #ifdef XNF_DEBUG 986 if (xnfdebug & XNF_DEBUG_TRACE) 987 printf("xnf%d set_promiscuous(0x%p, %x)\n", 988 ddi_get_instance(xnfp->devinfo), 989 (void *)xnfp, on); 990 #endif 991 /* 992 * We can't really do this, but we pretend that we can in 993 * order that snoop will work. 994 */ 995 return (0); 996 } 997 998 /* 999 * Clean buffers that we have responses for from the transmit ring. 1000 */ 1001 static int 1002 xnf_clean_tx_ring(xnf_t *xnfp) 1003 { 1004 RING_IDX next_resp, i; 1005 struct tx_pktinfo *reap; 1006 int id; 1007 grant_ref_t ref; 1008 1009 ASSERT(MUTEX_HELD(&xnfp->txlock)); 1010 1011 do { 1012 /* 1013 * index of next transmission ack 1014 */ 1015 next_resp = xnfp->tx_ring.sring->rsp_prod; 1016 membar_consumer(); 1017 /* 1018 * Clean tx packets from ring that we have responses for 1019 */ 1020 for (i = xnfp->tx_ring.rsp_cons; i != next_resp; i++) { 1021 id = RING_GET_RESPONSE(&xnfp->tx_ring, i)->id; 1022 reap = &xnfp->tx_pkt_info[id]; 1023 ref = reap->grant_ref; 1024 /* 1025 * Return id to free list 1026 */ 1027 reap->id = xnfp->tx_pkt_id_list; 1028 xnfp->tx_pkt_id_list = id; 1029 if (gnttab_query_foreign_access(ref) != 0) 1030 panic("tx grant still in use" 1031 "by backend domain"); 1032 (void) ddi_dma_unbind_handle(reap->dma_handle); 1033 (void) gnttab_end_foreign_access_ref(ref, 1034 xnfp->tx_pages_readonly); 1035 gnttab_release_grant_reference(&xnfp->gref_tx_head, 1036 ref); 1037 freemsg(reap->mp); 1038 reap->mp = NULL; 1039 reap->grant_ref = GRANT_INVALID_REF; 1040 if (reap->bdesc != NULL) 1041 xnf_free_xmit_buffer(reap->bdesc); 1042 reap->bdesc = NULL; 1043 } 1044 xnfp->tx_ring.rsp_cons = next_resp; 1045 membar_enter(); 1046 } while (next_resp != xnfp->tx_ring.sring->rsp_prod); 1047 return (NET_TX_RING_SIZE - (xnfp->tx_ring.sring->req_prod - next_resp)); 1048 } 1049 1050 /* 1051 * If we need to pull up data from either a packet that crosses a page 1052 * boundary or consisting of multiple mblks, do it here. We allocate 1053 * a page aligned buffer and copy the data into it. The header for the 1054 * allocated buffer is returned. (which is also allocated here) 1055 */ 1056 static struct xnf_buffer_desc * 1057 xnf_pullupmsg(xnf_t *xnfp, mblk_t *mp) 1058 { 1059 struct xnf_buffer_desc *bdesc; 1060 mblk_t *mptr; 1061 caddr_t bp; 1062 int len; 1063 1064 /* 1065 * get a xmit buffer from the xmit buffer pool 1066 */ 1067 mutex_enter(&xnfp->rx_buf_mutex); 1068 bdesc = xnf_get_xmit_buffer(xnfp); 1069 mutex_exit(&xnfp->rx_buf_mutex); 1070 if (bdesc == NULL) 1071 return (bdesc); 1072 /* 1073 * Copy the data into the buffer 1074 */ 1075 xnfp->stat_xmit_pullup++; 1076 bp = bdesc->buf; 1077 for (mptr = mp; mptr != NULL; mptr = mptr->b_cont) { 1078 len = mptr->b_wptr - mptr->b_rptr; 1079 bcopy(mptr->b_rptr, bp, len); 1080 bp += len; 1081 } 1082 return (bdesc); 1083 } 1084 1085 /* 1086 * xnf_send_one() -- send a packet 1087 * 1088 * Called when a packet is ready to be transmitted. A pointer to an 1089 * M_DATA message that contains the packet is passed to this routine. 1090 * At least the complete LLC header is contained in the message's 1091 * first message block, and the remainder of the packet is contained 1092 * within additional M_DATA message blocks linked to the first 1093 * message block. 1094 * 1095 */ 1096 static boolean_t 1097 xnf_send_one(xnf_t *xnfp, mblk_t *mp) 1098 { 1099 struct xnf_buffer_desc *xmitbuf; 1100 struct tx_pktinfo *txp_info; 1101 mblk_t *mptr; 1102 ddi_dma_cookie_t dma_cookie; 1103 RING_IDX slot, txs_out; 1104 int length = 0, i, pktlen = 0, rc, tx_id; 1105 int tx_ring_freespace, page_oops; 1106 uint_t ncookies; 1107 volatile netif_tx_request_t *txrp; 1108 caddr_t bufaddr; 1109 grant_ref_t ref; 1110 unsigned long mfn; 1111 uint32_t pflags; 1112 domid_t oeid; 1113 1114 #ifdef XNF_DEBUG 1115 if (xnfdebug & XNF_DEBUG_SEND) 1116 printf("xnf%d send(0x%p, 0x%p)\n", 1117 ddi_get_instance(xnfp->devinfo), 1118 (void *)xnfp, (void *)mp); 1119 #endif 1120 1121 ASSERT(mp != NULL); 1122 ASSERT(mp->b_next == NULL); 1123 ASSERT(MUTEX_HELD(&xnfp->txlock)); 1124 1125 tx_ring_freespace = xnf_clean_tx_ring(xnfp); 1126 ASSERT(tx_ring_freespace >= 0); 1127 1128 oeid = xvdi_get_oeid(xnfp->devinfo); 1129 xnfp->stat_xmit_attempt++; 1130 /* 1131 * If there are no xmit ring slots available, return. 1132 */ 1133 if (tx_ring_freespace == 0) { 1134 xnfp->stat_xmit_defer++; 1135 return (B_FALSE); /* Send should be retried */ 1136 } 1137 1138 slot = xnfp->tx_ring.sring->req_prod; 1139 /* Count the number of mblks in message and compute packet size */ 1140 for (i = 0, mptr = mp; mptr != NULL; mptr = mptr->b_cont, i++) 1141 pktlen += (mptr->b_wptr - mptr->b_rptr); 1142 1143 /* Make sure packet isn't too large */ 1144 if (pktlen > XNF_FRAMESIZE) { 1145 cmn_err(CE_WARN, "xnf%d: large packet %d bytes", 1146 ddi_get_instance(xnfp->devinfo), pktlen); 1147 freemsg(mp); 1148 return (B_FALSE); 1149 } 1150 1151 /* 1152 * Test if we cross a page boundary with our buffer 1153 */ 1154 page_oops = (i == 1) && 1155 (xnf_btop((size_t)mp->b_rptr) != 1156 xnf_btop((size_t)(mp->b_rptr + pktlen))); 1157 /* 1158 * XXPV - unfortunately, the Xen virtual net device currently 1159 * doesn't support multiple packet frags, so this will always 1160 * end up doing the pullup if we got more than one packet. 1161 */ 1162 if (i > xnf_max_tx_frags || page_oops) { 1163 if (page_oops) 1164 xnfp->stat_xmit_pagebndry++; 1165 if ((xmitbuf = xnf_pullupmsg(xnfp, mp)) == NULL) { 1166 /* could not allocate resources? */ 1167 #ifdef XNF_DEBUG 1168 cmn_err(CE_WARN, "xnf%d: pullupmsg failed", 1169 ddi_get_instance(xnfp->devinfo)); 1170 #endif 1171 xnfp->stat_xmit_defer++; 1172 return (B_FALSE); /* Retry send */ 1173 } 1174 bufaddr = xmitbuf->buf; 1175 } else { 1176 xmitbuf = NULL; 1177 bufaddr = (caddr_t)mp->b_rptr; 1178 } 1179 1180 /* set up data descriptor */ 1181 length = pktlen; 1182 1183 /* 1184 * Get packet id from free list 1185 */ 1186 tx_id = xnfp->tx_pkt_id_list; 1187 ASSERT(tx_id < NET_TX_RING_SIZE); 1188 txp_info = &xnfp->tx_pkt_info[tx_id]; 1189 xnfp->tx_pkt_id_list = txp_info->id; 1190 txp_info->id = tx_id; 1191 1192 /* Prepare for DMA mapping of tx buffer(s) */ 1193 rc = ddi_dma_addr_bind_handle(txp_info->dma_handle, 1194 NULL, bufaddr, length, DDI_DMA_WRITE | DDI_DMA_STREAMING, 1195 DDI_DMA_DONTWAIT, 0, &dma_cookie, &ncookies); 1196 if (rc != DDI_DMA_MAPPED) { 1197 ASSERT(rc != DDI_DMA_INUSE); 1198 ASSERT(rc != DDI_DMA_PARTIAL_MAP); 1199 /* 1200 * Return id to free list 1201 */ 1202 txp_info->id = xnfp->tx_pkt_id_list; 1203 xnfp->tx_pkt_id_list = tx_id; 1204 if (rc == DDI_DMA_NORESOURCES) { 1205 xnfp->stat_xmit_defer++; 1206 return (B_FALSE); /* Retry later */ 1207 } 1208 #ifdef XNF_DEBUG 1209 cmn_err(CE_WARN, "xnf%d: bind_handle failed (%x)", 1210 ddi_get_instance(xnfp->devinfo), rc); 1211 #endif 1212 return (B_FALSE); 1213 } 1214 1215 ASSERT(ncookies == 1); 1216 ref = gnttab_claim_grant_reference(&xnfp->gref_tx_head); 1217 ASSERT((signed short)ref >= 0); 1218 mfn = xnf_btop(pa_to_ma((paddr_t)dma_cookie.dmac_laddress)); 1219 gnttab_grant_foreign_access_ref(ref, oeid, mfn, 1220 xnfp->tx_pages_readonly); 1221 txp_info->grant_ref = ref; 1222 txrp = RING_GET_REQUEST(&xnfp->tx_ring, slot); 1223 txrp->gref = ref; 1224 txrp->size = dma_cookie.dmac_size; 1225 txrp->offset = (uintptr_t)bufaddr & PAGEOFFSET; 1226 txrp->id = tx_id; 1227 txrp->flags = 0; 1228 hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags); 1229 if (pflags != 0) { 1230 ASSERT(xnfp->cksum_offload); 1231 /* 1232 * If the local protocol stack requests checksum 1233 * offload we set the 'checksum blank' flag, 1234 * indicating to the peer that we need the checksum 1235 * calculated for us. 1236 * 1237 * We _don't_ set the validated flag, because we haven't 1238 * validated that the data and the checksum match. 1239 */ 1240 txrp->flags |= NETTXF_csum_blank; 1241 xnfp->stat_tx_cksum_deferred++; 1242 } 1243 membar_producer(); 1244 xnfp->tx_ring.sring->req_prod = slot + 1; 1245 1246 txp_info->mp = mp; 1247 txp_info->bdesc = xmitbuf; 1248 1249 txs_out = xnfp->tx_ring.sring->req_prod - xnfp->tx_ring.sring->rsp_prod; 1250 if (xnfp->tx_ring.sring->req_prod - xnfp->tx_ring.rsp_cons < 1251 XNF_TX_FREE_THRESH) { 1252 /* 1253 * The ring is getting full; Set up this packet 1254 * to cause an interrupt. 1255 */ 1256 xnfp->tx_ring.sring->rsp_event = 1257 xnfp->tx_ring.sring->rsp_prod + txs_out; 1258 } 1259 1260 xnfp->stat_opackets++; 1261 xnfp->stat_obytes += pktlen; 1262 1263 return (B_TRUE); /* successful transmit attempt */ 1264 } 1265 1266 mblk_t * 1267 xnf_send(void *arg, mblk_t *mp) 1268 { 1269 xnf_t *xnfp = arg; 1270 mblk_t *next; 1271 boolean_t sent_something = B_FALSE; 1272 1273 mutex_enter(&xnfp->txlock); 1274 1275 /* 1276 * Transmission attempts should be impossible without having 1277 * previously called xnf_start(). 1278 */ 1279 ASSERT(xnfp->running); 1280 1281 /* 1282 * Wait for getting connected to the backend 1283 */ 1284 while (!xnfp->connected) { 1285 cv_wait(&xnfp->cv, &xnfp->txlock); 1286 } 1287 1288 while (mp != NULL) { 1289 next = mp->b_next; 1290 mp->b_next = NULL; 1291 1292 if (!xnf_send_one(xnfp, mp)) { 1293 mp->b_next = next; 1294 break; 1295 } 1296 1297 mp = next; 1298 sent_something = B_TRUE; 1299 } 1300 1301 if (sent_something) 1302 ec_notify_via_evtchn(xnfp->evtchn); 1303 1304 mutex_exit(&xnfp->txlock); 1305 1306 return (mp); 1307 } 1308 1309 /* 1310 * xnf_intr() -- ring interrupt service routine 1311 */ 1312 static uint_t 1313 xnf_intr(caddr_t arg) 1314 { 1315 xnf_t *xnfp = (xnf_t *)arg; 1316 int tx_ring_space; 1317 1318 mutex_enter(&xnfp->intrlock); 1319 1320 /* 1321 * If not connected to the peer or not started by the upper 1322 * layers we cannot usefully handle interrupts. 1323 */ 1324 if (!(xnfp->connected && xnfp->running)) { 1325 mutex_exit(&xnfp->intrlock); 1326 return (DDI_INTR_UNCLAIMED); 1327 } 1328 1329 #ifdef XNF_DEBUG 1330 if (xnfdebug & XNF_DEBUG_INT) 1331 printf("xnf%d intr(0x%p)\n", 1332 ddi_get_instance(xnfp->devinfo), (void *)xnfp); 1333 #endif 1334 if (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->rx_ring)) { 1335 mblk_t *mp; 1336 1337 if ((mp = xnf_process_recv(xnfp)) != NULL) 1338 mac_rx(xnfp->mh, xnfp->rx_handle, mp); 1339 } 1340 1341 /* 1342 * Is tx ring nearly full? 1343 */ 1344 #define inuse(r) ((r).sring->req_prod - (r).rsp_cons) 1345 1346 if ((NET_TX_RING_SIZE - inuse(xnfp->tx_ring)) < XNF_TX_FREE_THRESH) { 1347 /* 1348 * Yes, clean it and try to start any blocked xmit 1349 * streams. 1350 */ 1351 mutex_enter(&xnfp->txlock); 1352 tx_ring_space = xnf_clean_tx_ring(xnfp); 1353 mutex_exit(&xnfp->txlock); 1354 if (tx_ring_space > XNF_TX_FREE_THRESH) { 1355 mutex_exit(&xnfp->intrlock); 1356 mac_tx_update(xnfp->mh); 1357 mutex_enter(&xnfp->intrlock); 1358 } else { 1359 /* 1360 * Schedule another tx interrupt when we have 1361 * sent enough packets to cross the threshold. 1362 */ 1363 xnfp->tx_ring.sring->rsp_event = 1364 xnfp->tx_ring.sring->rsp_prod + 1365 XNF_TX_FREE_THRESH - tx_ring_space + 1; 1366 } 1367 } 1368 #undef inuse 1369 1370 xnfp->stat_intr++; 1371 mutex_exit(&xnfp->intrlock); 1372 return (DDI_INTR_CLAIMED); /* indicate that the interrupt was for us */ 1373 } 1374 1375 /* 1376 * xnf_start() -- start the board receiving and enable interrupts. 1377 */ 1378 static int 1379 xnf_start(void *arg) 1380 { 1381 xnf_t *xnfp = arg; 1382 1383 #ifdef XNF_DEBUG 1384 if (xnfdebug & XNF_DEBUG_TRACE) 1385 printf("xnf%d start(0x%p)\n", 1386 ddi_get_instance(xnfp->devinfo), (void *)xnfp); 1387 #endif 1388 1389 mutex_enter(&xnfp->intrlock); 1390 mutex_enter(&xnfp->txlock); 1391 1392 /* Accept packets from above. */ 1393 xnfp->running = B_TRUE; 1394 1395 mutex_exit(&xnfp->txlock); 1396 mutex_exit(&xnfp->intrlock); 1397 1398 return (0); 1399 } 1400 1401 /* xnf_stop() - disable hardware */ 1402 static void 1403 xnf_stop(void *arg) 1404 { 1405 xnf_t *xnfp = arg; 1406 1407 #ifdef XNF_DEBUG 1408 if (xnfdebug & XNF_DEBUG_TRACE) 1409 printf("xnf%d stop(0x%p)\n", 1410 ddi_get_instance(xnfp->devinfo), (void *)xnfp); 1411 #endif 1412 1413 mutex_enter(&xnfp->intrlock); 1414 mutex_enter(&xnfp->txlock); 1415 1416 xnfp->running = B_FALSE; 1417 1418 mutex_exit(&xnfp->txlock); 1419 mutex_exit(&xnfp->intrlock); 1420 } 1421 1422 /* 1423 * Driver private functions follow 1424 */ 1425 1426 /* 1427 * Hang buffer on rx ring 1428 */ 1429 static void 1430 rx_buffer_hang(xnf_t *xnfp, struct xnf_buffer_desc *bdesc) 1431 { 1432 volatile netif_rx_request_t *reqp; 1433 RING_IDX hang_ix; 1434 grant_ref_t ref; 1435 domid_t oeid; 1436 1437 oeid = xvdi_get_oeid(xnfp->devinfo); 1438 1439 ASSERT(MUTEX_HELD(&xnfp->intrlock)); 1440 reqp = RING_GET_REQUEST(&xnfp->rx_ring, xnfp->rx_ring.req_prod_pvt); 1441 hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->rx_ring, 0)); 1442 ASSERT(xnfp->rxpkt_bufptr[hang_ix] == NULL); 1443 if (bdesc->grant_ref == GRANT_INVALID_REF) { 1444 ref = gnttab_claim_grant_reference(&xnfp->gref_rx_head); 1445 ASSERT((signed short)ref >= 0); 1446 bdesc->grant_ref = ref; 1447 gnttab_grant_foreign_transfer_ref(ref, oeid); 1448 } 1449 reqp->id = hang_ix; 1450 reqp->gref = bdesc->grant_ref; 1451 bdesc->id = hang_ix; 1452 xnfp->rxpkt_bufptr[hang_ix] = bdesc; 1453 membar_producer(); 1454 xnfp->rx_ring.req_prod_pvt++; 1455 } 1456 1457 1458 /* Process all queued received packets */ 1459 static mblk_t * 1460 xnf_process_recv(xnf_t *xnfp) 1461 { 1462 volatile netif_rx_response_t *rxpkt; 1463 mblk_t *mp, *head, *tail; 1464 struct xnf_buffer_desc *bdesc; 1465 extern mblk_t *desballoc(unsigned char *, size_t, uint_t, frtn_t *); 1466 boolean_t hwcsum = B_FALSE, notify, work_to_do; 1467 size_t len; 1468 pfn_t pfn; 1469 long cnt; 1470 1471 head = tail = NULL; 1472 loop: 1473 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->rx_ring)) { 1474 1475 rxpkt = RING_GET_RESPONSE(&xnfp->rx_ring, 1476 xnfp->rx_ring.rsp_cons); 1477 1478 /* 1479 * Take buffer off of receive ring 1480 */ 1481 hwcsum = B_FALSE; 1482 bdesc = xnfp->rxpkt_bufptr[rxpkt->id]; 1483 xnfp->rxpkt_bufptr[rxpkt->id] = NULL; 1484 ASSERT(bdesc->id == rxpkt->id); 1485 if (rxpkt->status <= 0) { 1486 mp = NULL; 1487 xnfp->stat_errrcv++; 1488 if (rxpkt->status == 0) 1489 xnfp->stat_runt++; 1490 if (rxpkt->status == NETIF_RSP_ERROR) 1491 xnfp->stat_mac_rcv_error++; 1492 if (rxpkt->status == NETIF_RSP_DROPPED) 1493 xnfp->stat_norcvbuf++; 1494 /* 1495 * re-hang the buffer 1496 */ 1497 rx_buffer_hang(xnfp, bdesc); 1498 } else { 1499 grant_ref_t ref = bdesc->grant_ref; 1500 struct xnf_buffer_desc *new_bdesc; 1501 unsigned long off = rxpkt->offset; 1502 unsigned long mfn; 1503 1504 len = rxpkt->status; 1505 ASSERT(off + len <= PAGEOFFSET); 1506 if (ref == GRANT_INVALID_REF) { 1507 mp = NULL; 1508 new_bdesc = bdesc; 1509 cmn_err(CE_WARN, "Bad rx grant reference %d " 1510 "from dom %d", ref, 1511 xvdi_get_oeid(xnfp->devinfo)); 1512 goto luckless; 1513 } 1514 bdesc->grant_ref = GRANT_INVALID_REF; 1515 mfn = gnttab_end_foreign_transfer_ref(ref); 1516 ASSERT(mfn != MFN_INVALID); 1517 ASSERT(hat_getpfnum(kas.a_hat, bdesc->buf) == 1518 PFN_INVALID); 1519 gnttab_release_grant_reference(&xnfp->gref_rx_head, 1520 ref); 1521 reassign_pfn(xnf_btop(bdesc->buf_phys), mfn); 1522 hat_devload(kas.a_hat, bdesc->buf, PAGESIZE, 1523 xnf_btop(bdesc->buf_phys), 1524 PROT_READ | PROT_WRITE, HAT_LOAD); 1525 balloon_drv_added(1); 1526 if (rxpkt->flags & NETRXF_data_validated) 1527 hwcsum = B_TRUE; 1528 if (len <= xnf_rx_bcopy_thresh) { 1529 /* 1530 * For small buffers, just copy the data 1531 * and send the copy upstream. 1532 */ 1533 new_bdesc = NULL; 1534 } else { 1535 /* 1536 * We send a pointer to this data upstream; 1537 * we need a new buffer to replace this one. 1538 */ 1539 mutex_enter(&xnfp->rx_buf_mutex); 1540 new_bdesc = xnf_get_buffer(xnfp); 1541 if (new_bdesc != NULL) { 1542 xnfp->rx_bufs_outstanding++; 1543 } else { 1544 xnfp->stat_rx_no_ringbuf++; 1545 } 1546 mutex_exit(&xnfp->rx_buf_mutex); 1547 } 1548 1549 if (new_bdesc == NULL) { 1550 /* 1551 * Don't have a new ring buffer; bcopy the data 1552 * from the buffer, and preserve the 1553 * original buffer 1554 */ 1555 if ((mp = allocb(len, BPRI_MED)) == NULL) { 1556 /* 1557 * Could't get buffer to copy to, 1558 * drop this data, and re-hang 1559 * the buffer on the ring. 1560 */ 1561 xnfp->stat_norcvbuf++; 1562 } else { 1563 bcopy(bdesc->buf + off, mp->b_wptr, 1564 len); 1565 } 1566 /* 1567 * Give the buffer page back to xen 1568 */ 1569 pfn = xnf_btop(bdesc->buf_phys); 1570 cnt = balloon_free_pages(1, &mfn, bdesc->buf, 1571 &pfn); 1572 if (cnt != 1) { 1573 cmn_err(CE_WARN, "unable to give a " 1574 "page back to the hypervisor\n"); 1575 } 1576 new_bdesc = bdesc; 1577 } else { 1578 if ((mp = desballoc((unsigned char *)bdesc->buf, 1579 off + len, 0, (frtn_t *)bdesc)) == NULL) { 1580 /* 1581 * Couldn't get mblk to pass recv data 1582 * up with, free the old ring buffer 1583 */ 1584 xnfp->stat_norcvbuf++; 1585 xnf_rcv_complete(bdesc); 1586 goto luckless; 1587 } 1588 (void) ddi_dma_sync(bdesc->dma_handle, 1589 0, 0, DDI_DMA_SYNC_FORCPU); 1590 1591 mp->b_wptr += off; 1592 mp->b_rptr += off; 1593 } 1594 luckless: 1595 if (mp) 1596 mp->b_wptr += len; 1597 /* re-hang old or hang new buffer */ 1598 rx_buffer_hang(xnfp, new_bdesc); 1599 } 1600 if (mp) { 1601 if (hwcsum) { 1602 /* 1603 * If the peer says that the data has 1604 * been validated then we declare that 1605 * the full checksum has been 1606 * verified. 1607 * 1608 * We don't look at the "checksum 1609 * blank" flag, and hence could have a 1610 * packet here that we are asserting 1611 * is good with a blank checksum. 1612 * 1613 * The hardware checksum offload 1614 * specification says that we must 1615 * provide the actual checksum as well 1616 * as an assertion that it is valid, 1617 * but the protocol stack doesn't 1618 * actually use it and some other 1619 * drivers don't bother, so we don't. 1620 * If it was necessary we could grovel 1621 * in the packet to find it. 1622 */ 1623 1624 (void) hcksum_assoc(mp, NULL, 1625 NULL, 0, 0, 0, 0, 1626 HCK_FULLCKSUM | 1627 HCK_FULLCKSUM_OK, 1628 0); 1629 xnfp->stat_rx_cksum_no_need++; 1630 } 1631 if (head == NULL) { 1632 head = tail = mp; 1633 } else { 1634 tail->b_next = mp; 1635 tail = mp; 1636 } 1637 1638 ASSERT(mp->b_next == NULL); 1639 1640 xnfp->stat_ipackets++; 1641 xnfp->stat_rbytes += len; 1642 } 1643 1644 xnfp->rx_ring.rsp_cons++; 1645 } 1646 1647 /* 1648 * Has more data come in since we started? 1649 */ 1650 /* LINTED: constant in conditional context */ 1651 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->rx_ring, work_to_do); 1652 if (work_to_do) 1653 goto loop; 1654 1655 /* 1656 * Indicate to the backend that we have re-filled the receive 1657 * ring. 1658 */ 1659 /* LINTED: constant in conditional context */ 1660 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->rx_ring, notify); 1661 if (notify) 1662 ec_notify_via_evtchn(xnfp->evtchn); 1663 1664 return (head); 1665 } 1666 1667 /* Called when the upper layers free a message we passed upstream */ 1668 static void 1669 xnf_rcv_complete(struct xnf_buffer_desc *bdesc) 1670 { 1671 xnf_t *xnfp = bdesc->xnfp; 1672 pfn_t pfn; 1673 long cnt; 1674 1675 /* One less outstanding receive buffer */ 1676 mutex_enter(&xnfp->rx_buf_mutex); 1677 --xnfp->rx_bufs_outstanding; 1678 /* 1679 * Return buffer to the free list, unless the free list is getting 1680 * too large. XXX - this threshold may need tuning. 1681 */ 1682 if (xnfp->rx_descs_free < xnf_recv_bufs_lowat) { 1683 /* 1684 * Unmap the page, and hand the machine page back 1685 * to xen so it can be re-used as a backend net buffer. 1686 */ 1687 pfn = xnf_btop(bdesc->buf_phys); 1688 cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn); 1689 if (cnt != 1) { 1690 cmn_err(CE_WARN, "unable to give a page back to the " 1691 "hypervisor\n"); 1692 } 1693 1694 bdesc->next = xnfp->free_list; 1695 xnfp->free_list = bdesc; 1696 xnfp->rx_descs_free++; 1697 mutex_exit(&xnfp->rx_buf_mutex); 1698 } else { 1699 /* 1700 * We can return everything here since we have a free buffer 1701 * that we have not given the backing page for back to xen. 1702 */ 1703 --xnfp->recv_buffer_count; 1704 mutex_exit(&xnfp->rx_buf_mutex); 1705 (void) ddi_dma_unbind_handle(bdesc->dma_handle); 1706 ddi_dma_mem_free(&bdesc->acc_handle); 1707 ddi_dma_free_handle(&bdesc->dma_handle); 1708 kmem_free(bdesc, sizeof (*bdesc)); 1709 } 1710 } 1711 1712 /* 1713 * xnf_alloc_dma_resources() -- initialize the drivers structures 1714 */ 1715 static int 1716 xnf_alloc_dma_resources(xnf_t *xnfp) 1717 { 1718 dev_info_t *devinfo = xnfp->devinfo; 1719 int i; 1720 size_t len; 1721 ddi_dma_cookie_t dma_cookie; 1722 uint_t ncookies; 1723 struct xnf_buffer_desc *bdesc; 1724 int rc; 1725 caddr_t rptr; 1726 1727 xnfp->n_recvs = NET_RX_RING_SIZE; 1728 xnfp->max_recv_bufs = xnf_recv_bufs_hiwat; 1729 1730 xnfp->n_xmits = NET_TX_RING_SIZE; 1731 1732 /* 1733 * The code below allocates all the DMA data structures that 1734 * need to be released when the driver is detached. 1735 * 1736 * First allocate handles for mapping (virtual address) pointers to 1737 * transmit data buffers to physical addresses 1738 */ 1739 for (i = 0; i < xnfp->n_xmits; i++) { 1740 if ((rc = ddi_dma_alloc_handle(devinfo, 1741 &tx_buffer_dma_attr, DDI_DMA_SLEEP, 0, 1742 &xnfp->tx_pkt_info[i].dma_handle)) != DDI_SUCCESS) 1743 return (DDI_FAILURE); 1744 } 1745 1746 /* 1747 * Allocate page for the transmit descriptor ring. 1748 */ 1749 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 1750 DDI_DMA_SLEEP, 0, &xnfp->tx_ring_dma_handle) != DDI_SUCCESS) 1751 goto alloc_error; 1752 1753 if (ddi_dma_mem_alloc(xnfp->tx_ring_dma_handle, 1754 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 1755 DDI_DMA_SLEEP, 0, &rptr, &len, 1756 &xnfp->tx_ring_dma_acchandle) != DDI_SUCCESS) { 1757 ddi_dma_free_handle(&xnfp->tx_ring_dma_handle); 1758 xnfp->tx_ring_dma_handle = NULL; 1759 goto alloc_error; 1760 } 1761 1762 if ((rc = ddi_dma_addr_bind_handle(xnfp->tx_ring_dma_handle, NULL, 1763 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 1764 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 1765 ddi_dma_mem_free(&xnfp->tx_ring_dma_acchandle); 1766 ddi_dma_free_handle(&xnfp->tx_ring_dma_handle); 1767 xnfp->tx_ring_dma_handle = NULL; 1768 xnfp->tx_ring_dma_acchandle = NULL; 1769 if (rc == DDI_DMA_NORESOURCES) 1770 goto alloc_error; 1771 else 1772 goto error; 1773 } 1774 1775 ASSERT(ncookies == 1); 1776 bzero(rptr, PAGESIZE); 1777 /* LINTED: constant in conditional context */ 1778 SHARED_RING_INIT((netif_tx_sring_t *)rptr); 1779 /* LINTED: constant in conditional context */ 1780 FRONT_RING_INIT(&xnfp->tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE); 1781 xnfp->tx_ring_phys_addr = dma_cookie.dmac_laddress; 1782 1783 /* 1784 * Allocate page for the receive descriptor ring. 1785 */ 1786 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 1787 DDI_DMA_SLEEP, 0, &xnfp->rx_ring_dma_handle) != DDI_SUCCESS) 1788 goto alloc_error; 1789 1790 if (ddi_dma_mem_alloc(xnfp->rx_ring_dma_handle, 1791 PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 1792 DDI_DMA_SLEEP, 0, &rptr, &len, 1793 &xnfp->rx_ring_dma_acchandle) != DDI_SUCCESS) { 1794 ddi_dma_free_handle(&xnfp->rx_ring_dma_handle); 1795 xnfp->rx_ring_dma_handle = NULL; 1796 goto alloc_error; 1797 } 1798 1799 if ((rc = ddi_dma_addr_bind_handle(xnfp->rx_ring_dma_handle, NULL, 1800 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 1801 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 1802 ddi_dma_mem_free(&xnfp->rx_ring_dma_acchandle); 1803 ddi_dma_free_handle(&xnfp->rx_ring_dma_handle); 1804 xnfp->rx_ring_dma_handle = NULL; 1805 xnfp->rx_ring_dma_acchandle = NULL; 1806 if (rc == DDI_DMA_NORESOURCES) 1807 goto alloc_error; 1808 else 1809 goto error; 1810 } 1811 1812 ASSERT(ncookies == 1); 1813 bzero(rptr, PAGESIZE); 1814 /* LINTED: constant in conditional context */ 1815 SHARED_RING_INIT((netif_rx_sring_t *)rptr); 1816 /* LINTED: constant in conditional context */ 1817 FRONT_RING_INIT(&xnfp->rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE); 1818 xnfp->rx_ring_phys_addr = dma_cookie.dmac_laddress; 1819 1820 /* 1821 * Preallocate receive buffers for each receive descriptor. 1822 */ 1823 1824 /* Set up the "free list" of receive buffer descriptors */ 1825 for (i = 0; i < xnfp->n_recvs; i++) { 1826 if ((bdesc = xnf_alloc_buffer(xnfp)) == NULL) 1827 goto alloc_error; 1828 bdesc->next = xnfp->free_list; 1829 xnfp->free_list = bdesc; 1830 } 1831 1832 return (DDI_SUCCESS); 1833 1834 alloc_error: 1835 cmn_err(CE_WARN, "xnf%d: could not allocate enough DMA memory", 1836 ddi_get_instance(xnfp->devinfo)); 1837 error: 1838 xnf_release_dma_resources(xnfp); 1839 return (DDI_FAILURE); 1840 } 1841 1842 /* 1843 * Release all DMA resources in the opposite order from acquisition 1844 * Should not be called until all outstanding esballoc buffers 1845 * have been returned. 1846 */ 1847 static void 1848 xnf_release_dma_resources(xnf_t *xnfp) 1849 { 1850 int i; 1851 1852 /* 1853 * Free receive buffers which are currently associated with 1854 * descriptors 1855 */ 1856 for (i = 0; i < xnfp->n_recvs; i++) { 1857 struct xnf_buffer_desc *bp; 1858 1859 if ((bp = xnfp->rxpkt_bufptr[i]) == NULL) 1860 continue; 1861 xnf_free_buffer(bp); 1862 xnfp->rxpkt_bufptr[i] = NULL; 1863 } 1864 1865 /* Free the receive ring buffer */ 1866 if (xnfp->rx_ring_dma_acchandle != NULL) { 1867 (void) ddi_dma_unbind_handle(xnfp->rx_ring_dma_handle); 1868 ddi_dma_mem_free(&xnfp->rx_ring_dma_acchandle); 1869 ddi_dma_free_handle(&xnfp->rx_ring_dma_handle); 1870 xnfp->rx_ring_dma_acchandle = NULL; 1871 } 1872 /* Free the transmit ring buffer */ 1873 if (xnfp->tx_ring_dma_acchandle != NULL) { 1874 (void) ddi_dma_unbind_handle(xnfp->tx_ring_dma_handle); 1875 ddi_dma_mem_free(&xnfp->tx_ring_dma_acchandle); 1876 ddi_dma_free_handle(&xnfp->tx_ring_dma_handle); 1877 xnfp->tx_ring_dma_acchandle = NULL; 1878 } 1879 } 1880 1881 static void 1882 xnf_release_mblks(xnf_t *xnfp) 1883 { 1884 int i; 1885 1886 for (i = 0; i < xnfp->n_xmits; i++) { 1887 if (xnfp->tx_pkt_info[i].mp == NULL) 1888 continue; 1889 freemsg(xnfp->tx_pkt_info[i].mp); 1890 xnfp->tx_pkt_info[i].mp = NULL; 1891 (void) ddi_dma_unbind_handle(xnfp->tx_pkt_info[i].dma_handle); 1892 } 1893 } 1894 1895 /* 1896 * Remove a xmit buffer descriptor from the head of the free list and return 1897 * a pointer to it. If no buffers on list, attempt to allocate a new one. 1898 * Called with the tx_buf_mutex held. 1899 */ 1900 static struct xnf_buffer_desc * 1901 xnf_get_xmit_buffer(xnf_t *xnfp) 1902 { 1903 struct xnf_buffer_desc *bdesc; 1904 1905 bdesc = xnfp->xmit_free_list; 1906 if (bdesc != NULL) { 1907 xnfp->xmit_free_list = bdesc->next; 1908 } else { 1909 bdesc = xnf_alloc_xmit_buffer(xnfp); 1910 } 1911 return (bdesc); 1912 } 1913 1914 /* 1915 * Remove a buffer descriptor from the head of the free list and return 1916 * a pointer to it. If no buffers on list, attempt to allocate a new one. 1917 * Called with the rx_buf_mutex held. 1918 */ 1919 static struct xnf_buffer_desc * 1920 xnf_get_buffer(xnf_t *xnfp) 1921 { 1922 struct xnf_buffer_desc *bdesc; 1923 1924 bdesc = xnfp->free_list; 1925 if (bdesc != NULL) { 1926 xnfp->free_list = bdesc->next; 1927 xnfp->rx_descs_free--; 1928 } else { 1929 bdesc = xnf_alloc_buffer(xnfp); 1930 } 1931 return (bdesc); 1932 } 1933 1934 /* 1935 * Free a xmit buffer back to the xmit free list 1936 */ 1937 static void 1938 xnf_free_xmit_buffer(struct xnf_buffer_desc *bp) 1939 { 1940 xnf_t *xnfp = bp->xnfp; 1941 1942 mutex_enter(&xnfp->tx_buf_mutex); 1943 bp->next = xnfp->xmit_free_list; 1944 xnfp->xmit_free_list = bp; 1945 mutex_exit(&xnfp->tx_buf_mutex); 1946 } 1947 1948 /* 1949 * Put a buffer descriptor onto the head of the free list. 1950 * We can't really free these buffers back to the kernel 1951 * since we have given away their backing page to be used 1952 * by the back end net driver. 1953 */ 1954 static void 1955 xnf_free_buffer(struct xnf_buffer_desc *bp) 1956 { 1957 xnf_t *xnfp = bp->xnfp; 1958 1959 mutex_enter(&xnfp->rx_buf_mutex); 1960 bp->next = xnfp->free_list; 1961 xnfp->free_list = bp; 1962 xnfp->rx_descs_free++; 1963 mutex_exit(&xnfp->rx_buf_mutex); 1964 } 1965 1966 /* 1967 * Allocate a DMA-able xmit buffer, including a structure to 1968 * keep track of the buffer. Called with tx_buf_mutex held. 1969 */ 1970 static struct xnf_buffer_desc * 1971 xnf_alloc_xmit_buffer(xnf_t *xnfp) 1972 { 1973 struct xnf_buffer_desc *bdesc; 1974 size_t len; 1975 1976 if ((bdesc = kmem_zalloc(sizeof (*bdesc), KM_NOSLEEP)) == NULL) 1977 return (NULL); 1978 1979 /* allocate a DMA access handle for receive buffer */ 1980 if (ddi_dma_alloc_handle(xnfp->devinfo, &tx_buffer_dma_attr, 1981 0, 0, &bdesc->dma_handle) != DDI_SUCCESS) 1982 goto failure; 1983 1984 /* Allocate DMA-able memory for transmit buffer */ 1985 if (ddi_dma_mem_alloc(bdesc->dma_handle, 1986 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0, 1987 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 1988 goto late_failure; 1989 1990 bdesc->xnfp = xnfp; 1991 xnfp->xmit_buffer_count++; 1992 1993 return (bdesc); 1994 1995 late_failure: 1996 ddi_dma_free_handle(&bdesc->dma_handle); 1997 1998 failure: 1999 kmem_free(bdesc, sizeof (*bdesc)); 2000 return (NULL); 2001 } 2002 2003 /* 2004 * Allocate a DMA-able receive buffer, including a structure to 2005 * keep track of the buffer. Called with rx_buf_mutex held. 2006 */ 2007 static struct xnf_buffer_desc * 2008 xnf_alloc_buffer(xnf_t *xnfp) 2009 { 2010 struct xnf_buffer_desc *bdesc; 2011 size_t len; 2012 uint_t ncookies; 2013 ddi_dma_cookie_t dma_cookie; 2014 long cnt; 2015 pfn_t pfn; 2016 2017 if (xnfp->recv_buffer_count >= xnfp->max_recv_bufs) 2018 return (NULL); 2019 2020 if ((bdesc = kmem_zalloc(sizeof (*bdesc), KM_NOSLEEP)) == NULL) 2021 return (NULL); 2022 2023 /* allocate a DMA access handle for receive buffer */ 2024 if (ddi_dma_alloc_handle(xnfp->devinfo, &rx_buffer_dma_attr, 2025 0, 0, &bdesc->dma_handle) != DDI_SUCCESS) 2026 goto failure; 2027 2028 /* Allocate DMA-able memory for receive buffer */ 2029 if (ddi_dma_mem_alloc(bdesc->dma_handle, 2030 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0, 2031 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 2032 goto late_failure; 2033 2034 /* bind to virtual address of buffer to get physical address */ 2035 if (ddi_dma_addr_bind_handle(bdesc->dma_handle, NULL, 2036 bdesc->buf, PAGESIZE, DDI_DMA_READ | DDI_DMA_STREAMING, 2037 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies) != DDI_DMA_MAPPED) 2038 goto late_late_failure; 2039 2040 bdesc->buf_phys = dma_cookie.dmac_laddress; 2041 bdesc->xnfp = xnfp; 2042 bdesc->free_rtn.free_func = xnf_rcv_complete; 2043 bdesc->free_rtn.free_arg = (char *)bdesc; 2044 bdesc->grant_ref = GRANT_INVALID_REF; 2045 ASSERT(ncookies == 1); 2046 2047 xnfp->recv_buffer_count++; 2048 /* 2049 * Unmap the page, and hand the machine page back 2050 * to xen so it can be used as a backend net buffer. 2051 */ 2052 pfn = xnf_btop(bdesc->buf_phys); 2053 cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn); 2054 if (cnt != 1) { 2055 cmn_err(CE_WARN, "unable to give a page back to the " 2056 "hypervisor\n"); 2057 } 2058 2059 return (bdesc); 2060 2061 late_late_failure: 2062 ddi_dma_mem_free(&bdesc->acc_handle); 2063 2064 late_failure: 2065 ddi_dma_free_handle(&bdesc->dma_handle); 2066 2067 failure: 2068 kmem_free(bdesc, sizeof (*bdesc)); 2069 return (NULL); 2070 } 2071 2072 static int 2073 xnf_stat(void *arg, uint_t stat, uint64_t *val) 2074 { 2075 xnf_t *xnfp = arg; 2076 2077 mutex_enter(&xnfp->intrlock); 2078 mutex_enter(&xnfp->txlock); 2079 2080 #define map_stat(q, r) \ 2081 case (MAC_STAT_##q): \ 2082 *val = xnfp->stat_##r; \ 2083 break 2084 2085 switch (stat) { 2086 2087 map_stat(IPACKETS, ipackets); 2088 map_stat(OPACKETS, opackets); 2089 map_stat(RBYTES, rbytes); 2090 map_stat(OBYTES, obytes); 2091 map_stat(NORCVBUF, norcvbuf); 2092 map_stat(IERRORS, errrcv); 2093 map_stat(NOXMTBUF, xmit_defer); 2094 2095 default: 2096 mutex_exit(&xnfp->txlock); 2097 mutex_exit(&xnfp->intrlock); 2098 2099 return (ENOTSUP); 2100 } 2101 2102 #undef map_stat 2103 2104 mutex_exit(&xnfp->txlock); 2105 mutex_exit(&xnfp->intrlock); 2106 2107 return (0); 2108 } 2109 2110 /*ARGSUSED*/ 2111 static void 2112 xnf_blank(void *arg, time_t ticks, uint_t count) 2113 { 2114 /* 2115 * XXPV dme: blanking is not currently implemented. 2116 * 2117 * It's not obvious how to use the 'ticks' argument here. 2118 * 2119 * 'Count' might be used as an indicator of how to set 2120 * rsp_event when posting receive buffers to the rx_ring. It 2121 * would replace the code at the tail of xnf_process_recv() 2122 * that simply indicates that the next completed packet should 2123 * cause an interrupt. 2124 */ 2125 } 2126 2127 static void 2128 xnf_resources(void *arg) 2129 { 2130 xnf_t *xnfp = arg; 2131 mac_rx_fifo_t mrf; 2132 2133 mrf.mrf_type = MAC_RX_FIFO; 2134 mrf.mrf_blank = xnf_blank; 2135 mrf.mrf_arg = (void *)xnfp; 2136 mrf.mrf_normal_blank_time = 128; /* XXPV dme: see xnf_blank() */ 2137 mrf.mrf_normal_pkt_count = 8; /* XXPV dme: see xnf_blank() */ 2138 2139 xnfp->rx_handle = mac_resource_add(xnfp->mh, 2140 (mac_resource_t *)&mrf); 2141 } 2142 2143 /*ARGSUSED*/ 2144 static void 2145 xnf_ioctl(void *arg, queue_t *q, mblk_t *mp) 2146 { 2147 miocnak(q, mp, 0, EINVAL); 2148 } 2149 2150 static boolean_t 2151 xnf_getcapab(void *arg, mac_capab_t cap, void *cap_data) 2152 { 2153 xnf_t *xnfp = arg; 2154 2155 switch (cap) { 2156 case MAC_CAPAB_HCKSUM: { 2157 uint32_t *capab = cap_data; 2158 2159 if (xnfp->cksum_offload) 2160 *capab = HCKSUM_INET_FULL_V4; 2161 else 2162 *capab = 0; 2163 break; 2164 } 2165 2166 case MAC_CAPAB_POLL: 2167 /* Just return B_TRUE. */ 2168 break; 2169 2170 default: 2171 return (B_FALSE); 2172 } 2173 2174 return (B_TRUE); 2175 } 2176 2177 /*ARGSUSED*/ 2178 static void 2179 oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 2180 void *arg, void *impl_data) 2181 { 2182 xnf_t *xnfp = ddi_get_driver_private(dip); 2183 XenbusState new_state = *(XenbusState *)impl_data; 2184 2185 ASSERT(xnfp != NULL); 2186 2187 switch (new_state) { 2188 case XenbusStateConnected: 2189 mutex_enter(&xnfp->intrlock); 2190 mutex_enter(&xnfp->txlock); 2191 2192 xnfp->connected = B_TRUE; 2193 cv_broadcast(&xnfp->cv); 2194 2195 mutex_exit(&xnfp->txlock); 2196 mutex_exit(&xnfp->intrlock); 2197 2198 ec_notify_via_evtchn(xnfp->evtchn); 2199 break; 2200 2201 default: 2202 break; 2203 } 2204 } 2205