1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2004-2006 Kip Macy 5 * Copyright (c) 2015 Wei Liu <wei.liu2@citrix.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 36 #include <sys/param.h> 37 #include <sys/sockio.h> 38 #include <sys/limits.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/module.h> 42 #include <sys/kernel.h> 43 #include <sys/socket.h> 44 #include <sys/sysctl.h> 45 #include <sys/taskqueue.h> 46 47 #include <net/if.h> 48 #include <net/if_var.h> 49 #include <net/if_arp.h> 50 #include <net/ethernet.h> 51 #include <net/if_media.h> 52 #include <net/bpf.h> 53 #include <net/if_types.h> 54 55 #include <netinet/in.h> 56 #include <netinet/ip.h> 57 #include <netinet/if_ether.h> 58 #include <netinet/tcp.h> 59 #include <netinet/tcp_lro.h> 60 61 #include <vm/vm.h> 62 #include <vm/pmap.h> 63 64 #include <sys/bus.h> 65 66 #include <xen/xen-os.h> 67 #include <xen/hypervisor.h> 68 #include <xen/xen_intr.h> 69 #include <xen/gnttab.h> 70 #include <contrib/xen/memory.h> 71 #include <contrib/xen/io/netif.h> 72 #include <xen/xenbus/xenbusvar.h> 73 74 #include <machine/bus.h> 75 76 #include "xenbus_if.h" 77 78 /* Features supported by all backends. TSO and LRO can be negotiated */ 79 #define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) 80 81 #define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE) 82 #define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE) 83 84 #define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1) 85 86 /* 87 * Should the driver do LRO on the RX end 88 * this can be toggled on the fly, but the 89 * interface must be reset (down/up) for it 90 * to take effect. 91 */ 92 static int xn_enable_lro = 1; 93 TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro); 94 95 /* 96 * Number of pairs of queues. 97 */ 98 static unsigned long xn_num_queues = 4; 99 TUNABLE_ULONG("hw.xn.num_queues", &xn_num_queues); 100 101 /** 102 * \brief The maximum allowed data fragments in a single transmit 103 * request. 104 * 105 * This limit is imposed by the backend driver. We assume here that 106 * we are dealing with a Linux driver domain and have set our limit 107 * to mirror the Linux MAX_SKB_FRAGS constant. 108 */ 109 #define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2) 110 111 #define RX_COPY_THRESHOLD 256 112 113 #define net_ratelimit() 0 114 115 struct netfront_rxq; 116 struct netfront_txq; 117 struct netfront_info; 118 struct netfront_rx_info; 119 120 static void xn_txeof(struct netfront_txq *); 121 static void xn_rxeof(struct netfront_rxq *); 122 static void xn_alloc_rx_buffers(struct netfront_rxq *); 123 static void xn_alloc_rx_buffers_callout(void *arg); 124 125 static void xn_release_rx_bufs(struct netfront_rxq *); 126 static void xn_release_tx_bufs(struct netfront_txq *); 127 128 static void xn_rxq_intr(struct netfront_rxq *); 129 static void xn_txq_intr(struct netfront_txq *); 130 static void xn_intr(void *); 131 static int xn_assemble_tx_request(struct netfront_txq *, struct mbuf *); 132 static int xn_ioctl(struct ifnet *, u_long, caddr_t); 133 static void xn_ifinit_locked(struct netfront_info *); 134 static void xn_ifinit(void *); 135 static void xn_stop(struct netfront_info *); 136 static void xn_query_features(struct netfront_info *np); 137 static int xn_configure_features(struct netfront_info *np); 138 static void netif_free(struct netfront_info *info); 139 static int netfront_detach(device_t dev); 140 141 static int xn_txq_mq_start_locked(struct netfront_txq *, struct mbuf *); 142 static int xn_txq_mq_start(struct ifnet *, struct mbuf *); 143 144 static int talk_to_backend(device_t dev, struct netfront_info *info); 145 static int create_netdev(device_t dev); 146 static void netif_disconnect_backend(struct netfront_info *info); 147 static int setup_device(device_t dev, struct netfront_info *info, 148 unsigned long); 149 static int xn_ifmedia_upd(struct ifnet *ifp); 150 static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); 151 152 static int xn_connect(struct netfront_info *); 153 static void xn_kick_rings(struct netfront_info *); 154 155 static int xn_get_responses(struct netfront_rxq *, 156 struct netfront_rx_info *, RING_IDX, RING_IDX *, 157 struct mbuf **); 158 159 #define virt_to_mfn(x) (vtophys(x) >> PAGE_SHIFT) 160 161 #define INVALID_P2M_ENTRY (~0UL) 162 #define XN_QUEUE_NAME_LEN 8 /* xn{t,r}x_%u, allow for two digits */ 163 struct netfront_rxq { 164 struct netfront_info *info; 165 u_int id; 166 char name[XN_QUEUE_NAME_LEN]; 167 struct mtx lock; 168 169 int ring_ref; 170 netif_rx_front_ring_t ring; 171 xen_intr_handle_t xen_intr_handle; 172 173 grant_ref_t gref_head; 174 grant_ref_t grant_ref[NET_RX_RING_SIZE + 1]; 175 176 struct mbuf *mbufs[NET_RX_RING_SIZE + 1]; 177 178 struct lro_ctrl lro; 179 180 struct callout rx_refill; 181 }; 182 183 struct netfront_txq { 184 struct netfront_info *info; 185 u_int id; 186 char name[XN_QUEUE_NAME_LEN]; 187 struct mtx lock; 188 189 int ring_ref; 190 netif_tx_front_ring_t ring; 191 xen_intr_handle_t xen_intr_handle; 192 193 grant_ref_t gref_head; 194 grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; 195 196 struct mbuf *mbufs[NET_TX_RING_SIZE + 1]; 197 int mbufs_cnt; 198 struct buf_ring *br; 199 200 struct taskqueue *tq; 201 struct task defrtask; 202 203 bus_dma_segment_t segs[MAX_TX_REQ_FRAGS]; 204 struct mbuf_xennet { 205 struct m_tag tag; 206 bus_dma_tag_t dma_tag; 207 bus_dmamap_t dma_map; 208 struct netfront_txq *txq; 209 SLIST_ENTRY(mbuf_xennet) next; 210 u_int count; 211 } xennet_tag[NET_TX_RING_SIZE + 1]; 212 SLIST_HEAD(, mbuf_xennet) tags; 213 214 bool full; 215 }; 216 217 struct netfront_info { 218 struct ifnet *xn_ifp; 219 220 struct mtx sc_lock; 221 222 u_int num_queues; 223 struct netfront_rxq *rxq; 224 struct netfront_txq *txq; 225 226 u_int carrier; 227 u_int maxfrags; 228 229 device_t xbdev; 230 uint8_t mac[ETHER_ADDR_LEN]; 231 232 int xn_if_flags; 233 234 struct ifmedia sc_media; 235 236 bus_dma_tag_t dma_tag; 237 238 bool xn_reset; 239 }; 240 241 struct netfront_rx_info { 242 struct netif_rx_response rx; 243 struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; 244 }; 245 246 #define XN_RX_LOCK(_q) mtx_lock(&(_q)->lock) 247 #define XN_RX_UNLOCK(_q) mtx_unlock(&(_q)->lock) 248 249 #define XN_TX_LOCK(_q) mtx_lock(&(_q)->lock) 250 #define XN_TX_TRYLOCK(_q) mtx_trylock(&(_q)->lock) 251 #define XN_TX_UNLOCK(_q) mtx_unlock(&(_q)->lock) 252 253 #define XN_LOCK(_sc) mtx_lock(&(_sc)->sc_lock); 254 #define XN_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_lock); 255 256 #define XN_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_lock, MA_OWNED); 257 #define XN_RX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); 258 #define XN_TX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); 259 260 #define netfront_carrier_on(netif) ((netif)->carrier = 1) 261 #define netfront_carrier_off(netif) ((netif)->carrier = 0) 262 #define netfront_carrier_ok(netif) ((netif)->carrier) 263 264 /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */ 265 266 static inline void 267 add_id_to_freelist(struct mbuf **list, uintptr_t id) 268 { 269 270 KASSERT(id != 0, 271 ("%s: the head item (0) must always be free.", __func__)); 272 list[id] = list[0]; 273 list[0] = (struct mbuf *)id; 274 } 275 276 static inline unsigned short 277 get_id_from_freelist(struct mbuf **list) 278 { 279 uintptr_t id; 280 281 id = (uintptr_t)list[0]; 282 KASSERT(id != 0, 283 ("%s: the head item (0) must always remain free.", __func__)); 284 list[0] = list[id]; 285 return (id); 286 } 287 288 static inline int 289 xn_rxidx(RING_IDX idx) 290 { 291 292 return idx & (NET_RX_RING_SIZE - 1); 293 } 294 295 static inline struct mbuf * 296 xn_get_rx_mbuf(struct netfront_rxq *rxq, RING_IDX ri) 297 { 298 int i; 299 struct mbuf *m; 300 301 i = xn_rxidx(ri); 302 m = rxq->mbufs[i]; 303 rxq->mbufs[i] = NULL; 304 return (m); 305 } 306 307 static inline grant_ref_t 308 xn_get_rx_ref(struct netfront_rxq *rxq, RING_IDX ri) 309 { 310 int i = xn_rxidx(ri); 311 grant_ref_t ref = rxq->grant_ref[i]; 312 313 KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n")); 314 rxq->grant_ref[i] = GRANT_REF_INVALID; 315 return (ref); 316 } 317 318 #define MTAG_COOKIE 1218492000 319 #define MTAG_XENNET 0 320 321 static void mbuf_grab(struct mbuf *m) 322 { 323 struct mbuf_xennet *ref; 324 325 ref = (struct mbuf_xennet *)m_tag_locate(m, MTAG_COOKIE, 326 MTAG_XENNET, NULL); 327 KASSERT(ref != NULL, ("Cannot find refcount")); 328 ref->count++; 329 } 330 331 static void mbuf_release(struct mbuf *m) 332 { 333 struct mbuf_xennet *ref; 334 335 ref = (struct mbuf_xennet *)m_tag_locate(m, MTAG_COOKIE, 336 MTAG_XENNET, NULL); 337 KASSERT(ref != NULL, ("Cannot find refcount")); 338 KASSERT(ref->count > 0, ("Invalid reference count")); 339 340 if (--ref->count == 0) 341 m_freem(m); 342 } 343 344 static void tag_free(struct m_tag *t) 345 { 346 struct mbuf_xennet *ref = (struct mbuf_xennet *)t; 347 348 KASSERT(ref->count == 0, ("Free mbuf tag with pending refcnt")); 349 bus_dmamap_sync(ref->dma_tag, ref->dma_map, BUS_DMASYNC_POSTWRITE); 350 bus_dmamap_destroy(ref->dma_tag, ref->dma_map); 351 SLIST_INSERT_HEAD(&ref->txq->tags, ref, next); 352 } 353 354 #define IPRINTK(fmt, args...) \ 355 printf("[XEN] " fmt, ##args) 356 #ifdef INVARIANTS 357 #define WPRINTK(fmt, args...) \ 358 printf("[XEN] " fmt, ##args) 359 #else 360 #define WPRINTK(fmt, args...) 361 #endif 362 #ifdef DEBUG 363 #define DPRINTK(fmt, args...) \ 364 printf("[XEN] %s: " fmt, __func__, ##args) 365 #else 366 #define DPRINTK(fmt, args...) 367 #endif 368 369 /** 370 * Read the 'mac' node at the given device's node in the store, and parse that 371 * as colon-separated octets, placing result the given mac array. mac must be 372 * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h). 373 * Return 0 on success, or errno on error. 374 */ 375 static int 376 xen_net_read_mac(device_t dev, uint8_t mac[]) 377 { 378 int error, i; 379 char *s, *e, *macstr; 380 const char *path; 381 382 path = xenbus_get_node(dev); 383 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 384 if (error == ENOENT) { 385 /* 386 * Deal with missing mac XenStore nodes on devices with 387 * HVM emulation (the 'ioemu' configuration attribute) 388 * enabled. 389 * 390 * The HVM emulator may execute in a stub device model 391 * domain which lacks the permission, only given to Dom0, 392 * to update the guest's XenStore tree. For this reason, 393 * the HVM emulator doesn't even attempt to write the 394 * front-side mac node, even when operating in Dom0. 395 * However, there should always be a mac listed in the 396 * backend tree. Fallback to this version if our query 397 * of the front side XenStore location doesn't find 398 * anything. 399 */ 400 path = xenbus_get_otherend_path(dev); 401 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 402 } 403 if (error != 0) { 404 xenbus_dev_fatal(dev, error, "parsing %s/mac", path); 405 return (error); 406 } 407 408 s = macstr; 409 for (i = 0; i < ETHER_ADDR_LEN; i++) { 410 mac[i] = strtoul(s, &e, 16); 411 if (s == e || (e[0] != ':' && e[0] != 0)) { 412 free(macstr, M_XENBUS); 413 return (ENOENT); 414 } 415 s = &e[1]; 416 } 417 free(macstr, M_XENBUS); 418 return (0); 419 } 420 421 /** 422 * Entry point to this code when a new device is created. Allocate the basic 423 * structures and the ring buffers for communication with the backend, and 424 * inform the backend of the appropriate details for those. Switch to 425 * Connected state. 426 */ 427 static int 428 netfront_probe(device_t dev) 429 { 430 431 if (xen_pv_nics_disabled()) 432 return (ENXIO); 433 434 if (!strcmp(xenbus_get_type(dev), "vif")) { 435 device_set_desc(dev, "Virtual Network Interface"); 436 return (0); 437 } 438 439 return (ENXIO); 440 } 441 442 static int 443 netfront_attach(device_t dev) 444 { 445 int err; 446 447 err = create_netdev(dev); 448 if (err != 0) { 449 xenbus_dev_fatal(dev, err, "creating netdev"); 450 return (err); 451 } 452 453 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), 454 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 455 OID_AUTO, "enable_lro", CTLFLAG_RW, 456 &xn_enable_lro, 0, "Large Receive Offload"); 457 458 SYSCTL_ADD_ULONG(device_get_sysctl_ctx(dev), 459 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 460 OID_AUTO, "num_queues", CTLFLAG_RD, 461 &xn_num_queues, "Number of pairs of queues"); 462 463 return (0); 464 } 465 466 static int 467 netfront_suspend(device_t dev) 468 { 469 struct netfront_info *np = device_get_softc(dev); 470 u_int i; 471 472 for (i = 0; i < np->num_queues; i++) { 473 XN_RX_LOCK(&np->rxq[i]); 474 XN_TX_LOCK(&np->txq[i]); 475 } 476 netfront_carrier_off(np); 477 for (i = 0; i < np->num_queues; i++) { 478 XN_RX_UNLOCK(&np->rxq[i]); 479 XN_TX_UNLOCK(&np->txq[i]); 480 } 481 return (0); 482 } 483 484 /** 485 * We are reconnecting to the backend, due to a suspend/resume, or a backend 486 * driver restart. We tear down our netif structure and recreate it, but 487 * leave the device-layer structures intact so that this is transparent to the 488 * rest of the kernel. 489 */ 490 static int 491 netfront_resume(device_t dev) 492 { 493 struct netfront_info *info = device_get_softc(dev); 494 u_int i; 495 496 if (xen_suspend_cancelled) { 497 for (i = 0; i < info->num_queues; i++) { 498 XN_RX_LOCK(&info->rxq[i]); 499 XN_TX_LOCK(&info->txq[i]); 500 } 501 netfront_carrier_on(info); 502 for (i = 0; i < info->num_queues; i++) { 503 XN_RX_UNLOCK(&info->rxq[i]); 504 XN_TX_UNLOCK(&info->txq[i]); 505 } 506 return (0); 507 } 508 509 netif_disconnect_backend(info); 510 return (0); 511 } 512 513 static int 514 write_queue_xenstore_keys(device_t dev, 515 struct netfront_rxq *rxq, 516 struct netfront_txq *txq, 517 struct xs_transaction *xst, bool hierarchy) 518 { 519 int err; 520 const char *message; 521 const char *node = xenbus_get_node(dev); 522 char *path; 523 size_t path_size; 524 525 KASSERT(rxq->id == txq->id, ("Mismatch between RX and TX queue ids")); 526 /* Split event channel support is not yet there. */ 527 KASSERT(rxq->xen_intr_handle == txq->xen_intr_handle, 528 ("Split event channels are not supported")); 529 530 if (hierarchy) { 531 path_size = strlen(node) + 10; 532 path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); 533 snprintf(path, path_size, "%s/queue-%u", node, rxq->id); 534 } else { 535 path_size = strlen(node) + 1; 536 path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); 537 snprintf(path, path_size, "%s", node); 538 } 539 540 err = xs_printf(*xst, path, "tx-ring-ref","%u", txq->ring_ref); 541 if (err != 0) { 542 message = "writing tx ring-ref"; 543 goto error; 544 } 545 err = xs_printf(*xst, path, "rx-ring-ref","%u", rxq->ring_ref); 546 if (err != 0) { 547 message = "writing rx ring-ref"; 548 goto error; 549 } 550 err = xs_printf(*xst, path, "event-channel", "%u", 551 xen_intr_port(rxq->xen_intr_handle)); 552 if (err != 0) { 553 message = "writing event-channel"; 554 goto error; 555 } 556 557 free(path, M_DEVBUF); 558 559 return (0); 560 561 error: 562 free(path, M_DEVBUF); 563 xenbus_dev_fatal(dev, err, "%s", message); 564 565 return (err); 566 } 567 568 /* Common code used when first setting up, and when resuming. */ 569 static int 570 talk_to_backend(device_t dev, struct netfront_info *info) 571 { 572 const char *message; 573 struct xs_transaction xst; 574 const char *node = xenbus_get_node(dev); 575 int err; 576 unsigned long num_queues, max_queues = 0; 577 unsigned int i; 578 579 err = xen_net_read_mac(dev, info->mac); 580 if (err != 0) { 581 xenbus_dev_fatal(dev, err, "parsing %s/mac", node); 582 goto out; 583 } 584 585 err = xs_scanf(XST_NIL, xenbus_get_otherend_path(info->xbdev), 586 "multi-queue-max-queues", NULL, "%lu", &max_queues); 587 if (err != 0) 588 max_queues = 1; 589 num_queues = xn_num_queues; 590 if (num_queues > max_queues) 591 num_queues = max_queues; 592 593 err = setup_device(dev, info, num_queues); 594 if (err != 0) 595 goto out; 596 597 again: 598 err = xs_transaction_start(&xst); 599 if (err != 0) { 600 xenbus_dev_fatal(dev, err, "starting transaction"); 601 goto free; 602 } 603 604 if (info->num_queues == 1) { 605 err = write_queue_xenstore_keys(dev, &info->rxq[0], 606 &info->txq[0], &xst, false); 607 if (err != 0) 608 goto abort_transaction_no_def_error; 609 } else { 610 err = xs_printf(xst, node, "multi-queue-num-queues", 611 "%u", info->num_queues); 612 if (err != 0) { 613 message = "writing multi-queue-num-queues"; 614 goto abort_transaction; 615 } 616 617 for (i = 0; i < info->num_queues; i++) { 618 err = write_queue_xenstore_keys(dev, &info->rxq[i], 619 &info->txq[i], &xst, true); 620 if (err != 0) 621 goto abort_transaction_no_def_error; 622 } 623 } 624 625 err = xs_printf(xst, node, "request-rx-copy", "%u", 1); 626 if (err != 0) { 627 message = "writing request-rx-copy"; 628 goto abort_transaction; 629 } 630 err = xs_printf(xst, node, "feature-rx-notify", "%d", 1); 631 if (err != 0) { 632 message = "writing feature-rx-notify"; 633 goto abort_transaction; 634 } 635 err = xs_printf(xst, node, "feature-sg", "%d", 1); 636 if (err != 0) { 637 message = "writing feature-sg"; 638 goto abort_transaction; 639 } 640 if ((info->xn_ifp->if_capenable & IFCAP_LRO) != 0) { 641 err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1); 642 if (err != 0) { 643 message = "writing feature-gso-tcpv4"; 644 goto abort_transaction; 645 } 646 } 647 if ((info->xn_ifp->if_capenable & IFCAP_RXCSUM) == 0) { 648 err = xs_printf(xst, node, "feature-no-csum-offload", "%d", 1); 649 if (err != 0) { 650 message = "writing feature-no-csum-offload"; 651 goto abort_transaction; 652 } 653 } 654 655 err = xs_transaction_end(xst, 0); 656 if (err != 0) { 657 if (err == EAGAIN) 658 goto again; 659 xenbus_dev_fatal(dev, err, "completing transaction"); 660 goto free; 661 } 662 663 return 0; 664 665 abort_transaction: 666 xenbus_dev_fatal(dev, err, "%s", message); 667 abort_transaction_no_def_error: 668 xs_transaction_end(xst, 1); 669 free: 670 netif_free(info); 671 out: 672 return (err); 673 } 674 675 static void 676 xn_rxq_intr(struct netfront_rxq *rxq) 677 { 678 679 XN_RX_LOCK(rxq); 680 xn_rxeof(rxq); 681 XN_RX_UNLOCK(rxq); 682 } 683 684 static void 685 xn_txq_start(struct netfront_txq *txq) 686 { 687 struct netfront_info *np = txq->info; 688 struct ifnet *ifp = np->xn_ifp; 689 690 XN_TX_LOCK_ASSERT(txq); 691 if (!drbr_empty(ifp, txq->br)) 692 xn_txq_mq_start_locked(txq, NULL); 693 } 694 695 static void 696 xn_txq_intr(struct netfront_txq *txq) 697 { 698 699 XN_TX_LOCK(txq); 700 if (RING_HAS_UNCONSUMED_RESPONSES(&txq->ring)) 701 xn_txeof(txq); 702 xn_txq_start(txq); 703 XN_TX_UNLOCK(txq); 704 } 705 706 static void 707 xn_txq_tq_deferred(void *xtxq, int pending) 708 { 709 struct netfront_txq *txq = xtxq; 710 711 XN_TX_LOCK(txq); 712 xn_txq_start(txq); 713 XN_TX_UNLOCK(txq); 714 } 715 716 static void 717 disconnect_rxq(struct netfront_rxq *rxq) 718 { 719 720 xn_release_rx_bufs(rxq); 721 gnttab_free_grant_references(rxq->gref_head); 722 gnttab_end_foreign_access(rxq->ring_ref, NULL); 723 /* 724 * No split event channel support at the moment, handle will 725 * be unbound in tx. So no need to call xen_intr_unbind here, 726 * but we do want to reset the handler to 0. 727 */ 728 rxq->xen_intr_handle = 0; 729 } 730 731 static void 732 destroy_rxq(struct netfront_rxq *rxq) 733 { 734 735 callout_drain(&rxq->rx_refill); 736 free(rxq->ring.sring, M_DEVBUF); 737 } 738 739 static void 740 destroy_rxqs(struct netfront_info *np) 741 { 742 int i; 743 744 for (i = 0; i < np->num_queues; i++) 745 destroy_rxq(&np->rxq[i]); 746 747 free(np->rxq, M_DEVBUF); 748 np->rxq = NULL; 749 } 750 751 static int 752 setup_rxqs(device_t dev, struct netfront_info *info, 753 unsigned long num_queues) 754 { 755 int q, i; 756 int error; 757 netif_rx_sring_t *rxs; 758 struct netfront_rxq *rxq; 759 760 info->rxq = malloc(sizeof(struct netfront_rxq) * num_queues, 761 M_DEVBUF, M_WAITOK|M_ZERO); 762 763 for (q = 0; q < num_queues; q++) { 764 rxq = &info->rxq[q]; 765 766 rxq->id = q; 767 rxq->info = info; 768 rxq->ring_ref = GRANT_REF_INVALID; 769 rxq->ring.sring = NULL; 770 snprintf(rxq->name, XN_QUEUE_NAME_LEN, "xnrx_%u", q); 771 mtx_init(&rxq->lock, rxq->name, "netfront receive lock", 772 MTX_DEF); 773 774 for (i = 0; i <= NET_RX_RING_SIZE; i++) { 775 rxq->mbufs[i] = NULL; 776 rxq->grant_ref[i] = GRANT_REF_INVALID; 777 } 778 779 /* Start resources allocation */ 780 781 if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, 782 &rxq->gref_head) != 0) { 783 device_printf(dev, "allocating rx gref"); 784 error = ENOMEM; 785 goto fail; 786 } 787 788 rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, 789 M_WAITOK|M_ZERO); 790 SHARED_RING_INIT(rxs); 791 FRONT_RING_INIT(&rxq->ring, rxs, PAGE_SIZE); 792 793 error = xenbus_grant_ring(dev, virt_to_mfn(rxs), 794 &rxq->ring_ref); 795 if (error != 0) { 796 device_printf(dev, "granting rx ring page"); 797 goto fail_grant_ring; 798 } 799 800 callout_init(&rxq->rx_refill, 1); 801 } 802 803 return (0); 804 805 fail_grant_ring: 806 gnttab_free_grant_references(rxq->gref_head); 807 free(rxq->ring.sring, M_DEVBUF); 808 fail: 809 for (; q >= 0; q--) { 810 disconnect_rxq(&info->rxq[q]); 811 destroy_rxq(&info->rxq[q]); 812 } 813 814 free(info->rxq, M_DEVBUF); 815 return (error); 816 } 817 818 static void 819 disconnect_txq(struct netfront_txq *txq) 820 { 821 822 xn_release_tx_bufs(txq); 823 gnttab_free_grant_references(txq->gref_head); 824 gnttab_end_foreign_access(txq->ring_ref, NULL); 825 xen_intr_unbind(&txq->xen_intr_handle); 826 } 827 828 static void 829 destroy_txq(struct netfront_txq *txq) 830 { 831 unsigned int i; 832 833 free(txq->ring.sring, M_DEVBUF); 834 buf_ring_free(txq->br, M_DEVBUF); 835 taskqueue_drain_all(txq->tq); 836 taskqueue_free(txq->tq); 837 838 for (i = 0; i <= NET_TX_RING_SIZE; i++) { 839 bus_dmamap_destroy(txq->info->dma_tag, 840 txq->xennet_tag[i].dma_map); 841 txq->xennet_tag[i].dma_map = NULL; 842 } 843 } 844 845 static void 846 destroy_txqs(struct netfront_info *np) 847 { 848 int i; 849 850 for (i = 0; i < np->num_queues; i++) 851 destroy_txq(&np->txq[i]); 852 853 free(np->txq, M_DEVBUF); 854 np->txq = NULL; 855 } 856 857 static int 858 setup_txqs(device_t dev, struct netfront_info *info, 859 unsigned long num_queues) 860 { 861 int q, i; 862 int error; 863 netif_tx_sring_t *txs; 864 struct netfront_txq *txq; 865 866 info->txq = malloc(sizeof(struct netfront_txq) * num_queues, 867 M_DEVBUF, M_WAITOK|M_ZERO); 868 869 for (q = 0; q < num_queues; q++) { 870 txq = &info->txq[q]; 871 872 txq->id = q; 873 txq->info = info; 874 875 txq->ring_ref = GRANT_REF_INVALID; 876 txq->ring.sring = NULL; 877 878 snprintf(txq->name, XN_QUEUE_NAME_LEN, "xntx_%u", q); 879 880 mtx_init(&txq->lock, txq->name, "netfront transmit lock", 881 MTX_DEF); 882 SLIST_INIT(&txq->tags); 883 884 for (i = 0; i <= NET_TX_RING_SIZE; i++) { 885 txq->mbufs[i] = (void *) ((u_long) i+1); 886 txq->grant_ref[i] = GRANT_REF_INVALID; 887 txq->xennet_tag[i].txq = txq; 888 txq->xennet_tag[i].dma_tag = info->dma_tag; 889 error = bus_dmamap_create(info->dma_tag, 0, 890 &txq->xennet_tag[i].dma_map); 891 if (error != 0) { 892 device_printf(dev, 893 "failed to allocate dma map\n"); 894 goto fail; 895 } 896 m_tag_setup(&txq->xennet_tag[i].tag, 897 MTAG_COOKIE, MTAG_XENNET, 898 sizeof(txq->xennet_tag[i]) - 899 sizeof(txq->xennet_tag[i].tag)); 900 txq->xennet_tag[i].tag.m_tag_free = &tag_free; 901 SLIST_INSERT_HEAD(&txq->tags, &txq->xennet_tag[i], 902 next); 903 } 904 txq->mbufs[NET_TX_RING_SIZE] = (void *)0; 905 906 /* Start resources allocation. */ 907 908 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, 909 &txq->gref_head) != 0) { 910 device_printf(dev, "failed to allocate tx grant refs\n"); 911 error = ENOMEM; 912 goto fail; 913 } 914 915 txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, 916 M_WAITOK|M_ZERO); 917 SHARED_RING_INIT(txs); 918 FRONT_RING_INIT(&txq->ring, txs, PAGE_SIZE); 919 920 error = xenbus_grant_ring(dev, virt_to_mfn(txs), 921 &txq->ring_ref); 922 if (error != 0) { 923 device_printf(dev, "failed to grant tx ring\n"); 924 goto fail_grant_ring; 925 } 926 927 txq->br = buf_ring_alloc(NET_TX_RING_SIZE, M_DEVBUF, 928 M_WAITOK, &txq->lock); 929 TASK_INIT(&txq->defrtask, 0, xn_txq_tq_deferred, txq); 930 931 txq->tq = taskqueue_create(txq->name, M_WAITOK, 932 taskqueue_thread_enqueue, &txq->tq); 933 934 error = taskqueue_start_threads(&txq->tq, 1, PI_NET, 935 "%s txq %d", device_get_nameunit(dev), txq->id); 936 if (error != 0) { 937 device_printf(dev, "failed to start tx taskq %d\n", 938 txq->id); 939 goto fail_start_thread; 940 } 941 942 error = xen_intr_alloc_and_bind_local_port(dev, 943 xenbus_get_otherend_id(dev), /* filter */ NULL, xn_intr, 944 &info->txq[q], INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY, 945 &txq->xen_intr_handle); 946 947 if (error != 0) { 948 device_printf(dev, "xen_intr_alloc_and_bind_local_port failed\n"); 949 goto fail_bind_port; 950 } 951 } 952 953 return (0); 954 955 fail_bind_port: 956 taskqueue_drain_all(txq->tq); 957 fail_start_thread: 958 buf_ring_free(txq->br, M_DEVBUF); 959 taskqueue_free(txq->tq); 960 gnttab_end_foreign_access(txq->ring_ref, NULL); 961 fail_grant_ring: 962 gnttab_free_grant_references(txq->gref_head); 963 free(txq->ring.sring, M_DEVBUF); 964 fail: 965 for (; q >= 0; q--) { 966 disconnect_txq(&info->txq[q]); 967 destroy_txq(&info->txq[q]); 968 } 969 970 free(info->txq, M_DEVBUF); 971 return (error); 972 } 973 974 static int 975 setup_device(device_t dev, struct netfront_info *info, 976 unsigned long num_queues) 977 { 978 int error; 979 int q; 980 981 if (info->txq) 982 destroy_txqs(info); 983 984 if (info->rxq) 985 destroy_rxqs(info); 986 987 info->num_queues = 0; 988 989 error = setup_rxqs(dev, info, num_queues); 990 if (error != 0) 991 goto out; 992 error = setup_txqs(dev, info, num_queues); 993 if (error != 0) 994 goto out; 995 996 info->num_queues = num_queues; 997 998 /* No split event channel at the moment. */ 999 for (q = 0; q < num_queues; q++) 1000 info->rxq[q].xen_intr_handle = info->txq[q].xen_intr_handle; 1001 1002 return (0); 1003 1004 out: 1005 KASSERT(error != 0, ("Error path taken without providing an error code")); 1006 return (error); 1007 } 1008 1009 #ifdef INET 1010 /** 1011 * If this interface has an ipv4 address, send an arp for it. This 1012 * helps to get the network going again after migrating hosts. 1013 */ 1014 static void 1015 netfront_send_fake_arp(device_t dev, struct netfront_info *info) 1016 { 1017 struct ifnet *ifp; 1018 struct ifaddr *ifa; 1019 1020 ifp = info->xn_ifp; 1021 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1022 if (ifa->ifa_addr->sa_family == AF_INET) { 1023 arp_ifinit(ifp, ifa); 1024 } 1025 } 1026 } 1027 #endif 1028 1029 /** 1030 * Callback received when the backend's state changes. 1031 */ 1032 static void 1033 netfront_backend_changed(device_t dev, XenbusState newstate) 1034 { 1035 struct netfront_info *sc = device_get_softc(dev); 1036 1037 DPRINTK("newstate=%d\n", newstate); 1038 1039 CURVNET_SET(sc->xn_ifp->if_vnet); 1040 1041 switch (newstate) { 1042 case XenbusStateInitialising: 1043 case XenbusStateInitialised: 1044 case XenbusStateUnknown: 1045 case XenbusStateReconfigured: 1046 case XenbusStateReconfiguring: 1047 break; 1048 case XenbusStateInitWait: 1049 if (xenbus_get_state(dev) != XenbusStateInitialising) 1050 break; 1051 if (xn_connect(sc) != 0) 1052 break; 1053 /* Switch to connected state before kicking the rings. */ 1054 xenbus_set_state(sc->xbdev, XenbusStateConnected); 1055 xn_kick_rings(sc); 1056 break; 1057 case XenbusStateClosing: 1058 xenbus_set_state(dev, XenbusStateClosed); 1059 break; 1060 case XenbusStateClosed: 1061 if (sc->xn_reset) { 1062 netif_disconnect_backend(sc); 1063 xenbus_set_state(dev, XenbusStateInitialising); 1064 sc->xn_reset = false; 1065 } 1066 break; 1067 case XenbusStateConnected: 1068 #ifdef INET 1069 netfront_send_fake_arp(dev, sc); 1070 #endif 1071 break; 1072 } 1073 1074 CURVNET_RESTORE(); 1075 } 1076 1077 /** 1078 * \brief Verify that there is sufficient space in the Tx ring 1079 * buffer for a maximally sized request to be enqueued. 1080 * 1081 * A transmit request requires a transmit descriptor for each packet 1082 * fragment, plus up to 2 entries for "options" (e.g. TSO). 1083 */ 1084 static inline int 1085 xn_tx_slot_available(struct netfront_txq *txq) 1086 { 1087 1088 return (RING_FREE_REQUESTS(&txq->ring) > (MAX_TX_REQ_FRAGS + 2)); 1089 } 1090 1091 static void 1092 xn_release_tx_bufs(struct netfront_txq *txq) 1093 { 1094 int i; 1095 1096 for (i = 1; i <= NET_TX_RING_SIZE; i++) { 1097 struct mbuf *m; 1098 1099 m = txq->mbufs[i]; 1100 1101 /* 1102 * We assume that no kernel addresses are 1103 * less than NET_TX_RING_SIZE. Any entry 1104 * in the table that is below this number 1105 * must be an index from free-list tracking. 1106 */ 1107 if (((uintptr_t)m) <= NET_TX_RING_SIZE) 1108 continue; 1109 gnttab_end_foreign_access_ref(txq->grant_ref[i]); 1110 gnttab_release_grant_reference(&txq->gref_head, 1111 txq->grant_ref[i]); 1112 txq->grant_ref[i] = GRANT_REF_INVALID; 1113 add_id_to_freelist(txq->mbufs, i); 1114 txq->mbufs_cnt--; 1115 if (txq->mbufs_cnt < 0) { 1116 panic("%s: tx_chain_cnt must be >= 0", __func__); 1117 } 1118 mbuf_release(m); 1119 } 1120 } 1121 1122 static struct mbuf * 1123 xn_alloc_one_rx_buffer(struct netfront_rxq *rxq) 1124 { 1125 struct mbuf *m; 1126 1127 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); 1128 if (m == NULL) 1129 return NULL; 1130 m->m_len = m->m_pkthdr.len = MJUMPAGESIZE; 1131 1132 return (m); 1133 } 1134 1135 static void 1136 xn_alloc_rx_buffers(struct netfront_rxq *rxq) 1137 { 1138 RING_IDX req_prod; 1139 int notify; 1140 1141 XN_RX_LOCK_ASSERT(rxq); 1142 1143 if (__predict_false(rxq->info->carrier == 0)) 1144 return; 1145 1146 for (req_prod = rxq->ring.req_prod_pvt; 1147 req_prod - rxq->ring.rsp_cons < NET_RX_RING_SIZE; 1148 req_prod++) { 1149 struct mbuf *m; 1150 unsigned short id; 1151 grant_ref_t ref; 1152 struct netif_rx_request *req; 1153 unsigned long pfn; 1154 1155 m = xn_alloc_one_rx_buffer(rxq); 1156 if (m == NULL) 1157 break; 1158 1159 id = xn_rxidx(req_prod); 1160 1161 KASSERT(rxq->mbufs[id] == NULL, ("non-NULL xn_rx_chain")); 1162 rxq->mbufs[id] = m; 1163 1164 ref = gnttab_claim_grant_reference(&rxq->gref_head); 1165 KASSERT(ref != GNTTAB_LIST_END, 1166 ("reserved grant references exhuasted")); 1167 rxq->grant_ref[id] = ref; 1168 1169 pfn = atop(vtophys(mtod(m, vm_offset_t))); 1170 req = RING_GET_REQUEST(&rxq->ring, req_prod); 1171 1172 gnttab_grant_foreign_access_ref(ref, 1173 xenbus_get_otherend_id(rxq->info->xbdev), pfn, 0); 1174 req->id = id; 1175 req->gref = ref; 1176 } 1177 1178 rxq->ring.req_prod_pvt = req_prod; 1179 1180 /* Not enough requests? Try again later. */ 1181 if (req_prod - rxq->ring.rsp_cons < NET_RX_SLOTS_MIN) { 1182 callout_reset_curcpu(&rxq->rx_refill, hz/10, 1183 xn_alloc_rx_buffers_callout, rxq); 1184 return; 1185 } 1186 1187 wmb(); /* barrier so backend seens requests */ 1188 1189 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rxq->ring, notify); 1190 if (notify) 1191 xen_intr_signal(rxq->xen_intr_handle); 1192 } 1193 1194 static void xn_alloc_rx_buffers_callout(void *arg) 1195 { 1196 struct netfront_rxq *rxq; 1197 1198 rxq = (struct netfront_rxq *)arg; 1199 XN_RX_LOCK(rxq); 1200 xn_alloc_rx_buffers(rxq); 1201 XN_RX_UNLOCK(rxq); 1202 } 1203 1204 static void 1205 xn_release_rx_bufs(struct netfront_rxq *rxq) 1206 { 1207 int i, ref; 1208 struct mbuf *m; 1209 1210 for (i = 0; i < NET_RX_RING_SIZE; i++) { 1211 m = rxq->mbufs[i]; 1212 1213 if (m == NULL) 1214 continue; 1215 1216 ref = rxq->grant_ref[i]; 1217 if (ref == GRANT_REF_INVALID) 1218 continue; 1219 1220 gnttab_end_foreign_access_ref(ref); 1221 gnttab_release_grant_reference(&rxq->gref_head, ref); 1222 rxq->mbufs[i] = NULL; 1223 rxq->grant_ref[i] = GRANT_REF_INVALID; 1224 m_freem(m); 1225 } 1226 } 1227 1228 static void 1229 xn_rxeof(struct netfront_rxq *rxq) 1230 { 1231 struct ifnet *ifp; 1232 struct netfront_info *np = rxq->info; 1233 #if (defined(INET) || defined(INET6)) 1234 struct lro_ctrl *lro = &rxq->lro; 1235 #endif 1236 struct netfront_rx_info rinfo; 1237 struct netif_rx_response *rx = &rinfo.rx; 1238 struct netif_extra_info *extras = rinfo.extras; 1239 RING_IDX i, rp; 1240 struct mbuf *m; 1241 struct mbufq mbufq_rxq, mbufq_errq; 1242 int err, work_to_do; 1243 1244 XN_RX_LOCK_ASSERT(rxq); 1245 1246 if (!netfront_carrier_ok(np)) 1247 return; 1248 1249 /* XXX: there should be some sane limit. */ 1250 mbufq_init(&mbufq_errq, INT_MAX); 1251 mbufq_init(&mbufq_rxq, INT_MAX); 1252 1253 ifp = np->xn_ifp; 1254 1255 do { 1256 rp = rxq->ring.sring->rsp_prod; 1257 rmb(); /* Ensure we see queued responses up to 'rp'. */ 1258 1259 i = rxq->ring.rsp_cons; 1260 while ((i != rp)) { 1261 memcpy(rx, RING_GET_RESPONSE(&rxq->ring, i), sizeof(*rx)); 1262 memset(extras, 0, sizeof(rinfo.extras)); 1263 1264 m = NULL; 1265 err = xn_get_responses(rxq, &rinfo, rp, &i, &m); 1266 1267 if (__predict_false(err)) { 1268 if (m) 1269 (void )mbufq_enqueue(&mbufq_errq, m); 1270 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 1271 continue; 1272 } 1273 1274 m->m_pkthdr.rcvif = ifp; 1275 if (rx->flags & NETRXF_data_validated) { 1276 /* 1277 * According to mbuf(9) the correct way to tell 1278 * the stack that the checksum of an inbound 1279 * packet is correct, without it actually being 1280 * present (because the underlying interface 1281 * doesn't provide it), is to set the 1282 * CSUM_DATA_VALID and CSUM_PSEUDO_HDR flags, 1283 * and the csum_data field to 0xffff. 1284 */ 1285 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID 1286 | CSUM_PSEUDO_HDR); 1287 m->m_pkthdr.csum_data = 0xffff; 1288 } 1289 if ((rx->flags & NETRXF_extra_info) != 0 && 1290 (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type == 1291 XEN_NETIF_EXTRA_TYPE_GSO)) { 1292 m->m_pkthdr.tso_segsz = 1293 extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].u.gso.size; 1294 m->m_pkthdr.csum_flags |= CSUM_TSO; 1295 } 1296 1297 (void )mbufq_enqueue(&mbufq_rxq, m); 1298 } 1299 1300 rxq->ring.rsp_cons = i; 1301 1302 xn_alloc_rx_buffers(rxq); 1303 1304 RING_FINAL_CHECK_FOR_RESPONSES(&rxq->ring, work_to_do); 1305 } while (work_to_do); 1306 1307 mbufq_drain(&mbufq_errq); 1308 /* 1309 * Process all the mbufs after the remapping is complete. 1310 * Break the mbuf chain first though. 1311 */ 1312 while ((m = mbufq_dequeue(&mbufq_rxq)) != NULL) { 1313 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 1314 #if (defined(INET) || defined(INET6)) 1315 /* Use LRO if possible */ 1316 if ((ifp->if_capenable & IFCAP_LRO) == 0 || 1317 lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) { 1318 /* 1319 * If LRO fails, pass up to the stack 1320 * directly. 1321 */ 1322 (*ifp->if_input)(ifp, m); 1323 } 1324 #else 1325 (*ifp->if_input)(ifp, m); 1326 #endif 1327 } 1328 1329 #if (defined(INET) || defined(INET6)) 1330 /* 1331 * Flush any outstanding LRO work 1332 */ 1333 tcp_lro_flush_all(lro); 1334 #endif 1335 } 1336 1337 static void 1338 xn_txeof(struct netfront_txq *txq) 1339 { 1340 RING_IDX i, prod; 1341 unsigned short id; 1342 struct ifnet *ifp; 1343 netif_tx_response_t *txr; 1344 struct mbuf *m; 1345 struct netfront_info *np = txq->info; 1346 1347 XN_TX_LOCK_ASSERT(txq); 1348 1349 if (!netfront_carrier_ok(np)) 1350 return; 1351 1352 ifp = np->xn_ifp; 1353 1354 do { 1355 prod = txq->ring.sring->rsp_prod; 1356 rmb(); /* Ensure we see responses up to 'rp'. */ 1357 1358 for (i = txq->ring.rsp_cons; i != prod; i++) { 1359 txr = RING_GET_RESPONSE(&txq->ring, i); 1360 if (txr->status == NETIF_RSP_NULL) 1361 continue; 1362 1363 if (txr->status != NETIF_RSP_OKAY) { 1364 printf("%s: WARNING: response is %d!\n", 1365 __func__, txr->status); 1366 } 1367 id = txr->id; 1368 m = txq->mbufs[id]; 1369 KASSERT(m != NULL, ("mbuf not found in chain")); 1370 KASSERT((uintptr_t)m > NET_TX_RING_SIZE, 1371 ("mbuf already on the free list, but we're " 1372 "trying to free it again!")); 1373 M_ASSERTVALID(m); 1374 1375 if (__predict_false(gnttab_query_foreign_access( 1376 txq->grant_ref[id]) != 0)) { 1377 panic("%s: grant id %u still in use by the " 1378 "backend", __func__, id); 1379 } 1380 gnttab_end_foreign_access_ref(txq->grant_ref[id]); 1381 gnttab_release_grant_reference( 1382 &txq->gref_head, txq->grant_ref[id]); 1383 txq->grant_ref[id] = GRANT_REF_INVALID; 1384 1385 txq->mbufs[id] = NULL; 1386 add_id_to_freelist(txq->mbufs, id); 1387 txq->mbufs_cnt--; 1388 mbuf_release(m); 1389 /* Only mark the txq active if we've freed up at least one slot to try */ 1390 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1391 } 1392 txq->ring.rsp_cons = prod; 1393 1394 /* 1395 * Set a new event, then check for race with update of 1396 * tx_cons. Note that it is essential to schedule a 1397 * callback, no matter how few buffers are pending. Even if 1398 * there is space in the transmit ring, higher layers may 1399 * be blocked because too much data is outstanding: in such 1400 * cases notification from Xen is likely to be the only kick 1401 * that we'll get. 1402 */ 1403 txq->ring.sring->rsp_event = 1404 prod + ((txq->ring.sring->req_prod - prod) >> 1) + 1; 1405 1406 mb(); 1407 } while (prod != txq->ring.sring->rsp_prod); 1408 1409 if (txq->full && 1410 ((txq->ring.sring->req_prod - prod) < NET_TX_RING_SIZE)) { 1411 txq->full = false; 1412 xn_txq_start(txq); 1413 } 1414 } 1415 1416 static void 1417 xn_intr(void *xsc) 1418 { 1419 struct netfront_txq *txq = xsc; 1420 struct netfront_info *np = txq->info; 1421 struct netfront_rxq *rxq = &np->rxq[txq->id]; 1422 1423 /* kick both tx and rx */ 1424 xn_rxq_intr(rxq); 1425 xn_txq_intr(txq); 1426 } 1427 1428 static void 1429 xn_move_rx_slot(struct netfront_rxq *rxq, struct mbuf *m, 1430 grant_ref_t ref) 1431 { 1432 int new = xn_rxidx(rxq->ring.req_prod_pvt); 1433 1434 KASSERT(rxq->mbufs[new] == NULL, ("mbufs != NULL")); 1435 rxq->mbufs[new] = m; 1436 rxq->grant_ref[new] = ref; 1437 RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->id = new; 1438 RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->gref = ref; 1439 rxq->ring.req_prod_pvt++; 1440 } 1441 1442 static int 1443 xn_get_extras(struct netfront_rxq *rxq, 1444 struct netif_extra_info *extras, RING_IDX rp, RING_IDX *cons) 1445 { 1446 struct netif_extra_info *extra; 1447 1448 int err = 0; 1449 1450 do { 1451 struct mbuf *m; 1452 grant_ref_t ref; 1453 1454 if (__predict_false(*cons + 1 == rp)) { 1455 err = EINVAL; 1456 break; 1457 } 1458 1459 extra = (struct netif_extra_info *) 1460 RING_GET_RESPONSE(&rxq->ring, ++(*cons)); 1461 1462 if (__predict_false(!extra->type || 1463 extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 1464 err = EINVAL; 1465 } else { 1466 memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); 1467 } 1468 1469 m = xn_get_rx_mbuf(rxq, *cons); 1470 ref = xn_get_rx_ref(rxq, *cons); 1471 xn_move_rx_slot(rxq, m, ref); 1472 } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); 1473 1474 return err; 1475 } 1476 1477 static int 1478 xn_get_responses(struct netfront_rxq *rxq, 1479 struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, 1480 struct mbuf **list) 1481 { 1482 struct netif_rx_response *rx = &rinfo->rx; 1483 struct netif_extra_info *extras = rinfo->extras; 1484 struct mbuf *m, *m0, *m_prev; 1485 grant_ref_t ref = xn_get_rx_ref(rxq, *cons); 1486 int frags = 1; 1487 int err = 0; 1488 u_long ret __diagused; 1489 1490 m0 = m = m_prev = xn_get_rx_mbuf(rxq, *cons); 1491 1492 if (rx->flags & NETRXF_extra_info) { 1493 err = xn_get_extras(rxq, extras, rp, cons); 1494 } 1495 1496 if (m0 != NULL) { 1497 m0->m_pkthdr.len = 0; 1498 m0->m_next = NULL; 1499 } 1500 1501 for (;;) { 1502 #if 0 1503 DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n", 1504 rx->status, rx->offset, frags); 1505 #endif 1506 if (__predict_false(rx->status < 0 || 1507 rx->offset + rx->status > PAGE_SIZE)) { 1508 xn_move_rx_slot(rxq, m, ref); 1509 if (m0 == m) 1510 m0 = NULL; 1511 m = NULL; 1512 err = EINVAL; 1513 goto next_skip_queue; 1514 } 1515 1516 /* 1517 * This definitely indicates a bug, either in this driver or in 1518 * the backend driver. In future this should flag the bad 1519 * situation to the system controller to reboot the backed. 1520 */ 1521 if (ref == GRANT_REF_INVALID) { 1522 printf("%s: Bad rx response id %d.\n", __func__, rx->id); 1523 err = EINVAL; 1524 goto next; 1525 } 1526 1527 ret = gnttab_end_foreign_access_ref(ref); 1528 KASSERT(ret, ("Unable to end access to grant references")); 1529 1530 gnttab_release_grant_reference(&rxq->gref_head, ref); 1531 1532 next: 1533 if (m == NULL) 1534 break; 1535 1536 m->m_len = rx->status; 1537 m->m_data += rx->offset; 1538 m0->m_pkthdr.len += rx->status; 1539 1540 next_skip_queue: 1541 if (!(rx->flags & NETRXF_more_data)) 1542 break; 1543 1544 if (*cons + frags == rp) { 1545 if (net_ratelimit()) 1546 WPRINTK("Need more frags\n"); 1547 err = ENOENT; 1548 printf("%s: cons %u frags %u rp %u, not enough frags\n", 1549 __func__, *cons, frags, rp); 1550 break; 1551 } 1552 /* 1553 * Note that m can be NULL, if rx->status < 0 or if 1554 * rx->offset + rx->status > PAGE_SIZE above. 1555 */ 1556 m_prev = m; 1557 1558 rx = RING_GET_RESPONSE(&rxq->ring, *cons + frags); 1559 m = xn_get_rx_mbuf(rxq, *cons + frags); 1560 1561 /* 1562 * m_prev == NULL can happen if rx->status < 0 or if 1563 * rx->offset + * rx->status > PAGE_SIZE above. 1564 */ 1565 if (m_prev != NULL) 1566 m_prev->m_next = m; 1567 1568 /* 1569 * m0 can be NULL if rx->status < 0 or if * rx->offset + 1570 * rx->status > PAGE_SIZE above. 1571 */ 1572 if (m0 == NULL) 1573 m0 = m; 1574 m->m_next = NULL; 1575 ref = xn_get_rx_ref(rxq, *cons + frags); 1576 frags++; 1577 } 1578 *list = m0; 1579 *cons += frags; 1580 1581 return (err); 1582 } 1583 1584 /** 1585 * Given an mbuf chain, make sure we have enough room and then push 1586 * it onto the transmit ring. 1587 */ 1588 static int 1589 xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) 1590 { 1591 struct netfront_info *np = txq->info; 1592 struct ifnet *ifp = np->xn_ifp; 1593 int otherend_id, error, nfrags; 1594 bus_dma_segment_t *segs = txq->segs; 1595 struct mbuf_xennet *tag; 1596 bus_dmamap_t map; 1597 unsigned int i; 1598 1599 KASSERT(!SLIST_EMPTY(&txq->tags), ("no tags available")); 1600 tag = SLIST_FIRST(&txq->tags); 1601 SLIST_REMOVE_HEAD(&txq->tags, next); 1602 KASSERT(tag->count == 0, ("tag already in-use")); 1603 map = tag->dma_map; 1604 error = bus_dmamap_load_mbuf_sg(np->dma_tag, map, m_head, segs, 1605 &nfrags, 0); 1606 if (error == EFBIG || nfrags > np->maxfrags) { 1607 struct mbuf *m; 1608 1609 bus_dmamap_unload(np->dma_tag, map); 1610 m = m_defrag(m_head, M_NOWAIT); 1611 if (!m) { 1612 /* 1613 * Defrag failed, so free the mbuf and 1614 * therefore drop the packet. 1615 */ 1616 SLIST_INSERT_HEAD(&txq->tags, tag, next); 1617 m_freem(m_head); 1618 return (EMSGSIZE); 1619 } 1620 m_head = m; 1621 error = bus_dmamap_load_mbuf_sg(np->dma_tag, map, m_head, segs, 1622 &nfrags, 0); 1623 if (error != 0 || nfrags > np->maxfrags) { 1624 bus_dmamap_unload(np->dma_tag, map); 1625 SLIST_INSERT_HEAD(&txq->tags, tag, next); 1626 m_freem(m_head); 1627 return (error ?: EFBIG); 1628 } 1629 } else if (error != 0) { 1630 SLIST_INSERT_HEAD(&txq->tags, tag, next); 1631 m_freem(m_head); 1632 return (error); 1633 } 1634 1635 /** 1636 * The FreeBSD TCP stack, with TSO enabled, can produce a chain 1637 * of mbufs longer than Linux can handle. Make sure we don't 1638 * pass a too-long chain over to the other side by dropping the 1639 * packet. It doesn't look like there is currently a way to 1640 * tell the TCP stack to generate a shorter chain of packets. 1641 */ 1642 if (nfrags > MAX_TX_REQ_FRAGS) { 1643 #ifdef DEBUG 1644 printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback " 1645 "won't be able to handle it, dropping\n", 1646 __func__, nfrags, MAX_TX_REQ_FRAGS); 1647 #endif 1648 SLIST_INSERT_HEAD(&txq->tags, tag, next); 1649 bus_dmamap_unload(np->dma_tag, map); 1650 m_freem(m_head); 1651 return (EMSGSIZE); 1652 } 1653 1654 /* 1655 * This check should be redundant. We've already verified that we 1656 * have enough slots in the ring to handle a packet of maximum 1657 * size, and that our packet is less than the maximum size. Keep 1658 * it in here as an assert for now just to make certain that 1659 * chain_cnt is accurate. 1660 */ 1661 KASSERT((txq->mbufs_cnt + nfrags) <= NET_TX_RING_SIZE, 1662 ("%s: chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE " 1663 "(%d)!", __func__, (int) txq->mbufs_cnt, 1664 (int) nfrags, (int) NET_TX_RING_SIZE)); 1665 1666 /* 1667 * Start packing the mbufs in this chain into 1668 * the fragment pointers. Stop when we run out 1669 * of fragments or hit the end of the mbuf chain. 1670 */ 1671 otherend_id = xenbus_get_otherend_id(np->xbdev); 1672 m_tag_prepend(m_head, &tag->tag); 1673 for (i = 0; i < nfrags; i++) { 1674 netif_tx_request_t *tx; 1675 uintptr_t id; 1676 grant_ref_t ref; 1677 u_long mfn; /* XXX Wrong type? */ 1678 1679 tx = RING_GET_REQUEST(&txq->ring, txq->ring.req_prod_pvt); 1680 id = get_id_from_freelist(txq->mbufs); 1681 if (id == 0) 1682 panic("%s: was allocated the freelist head!\n", 1683 __func__); 1684 txq->mbufs_cnt++; 1685 if (txq->mbufs_cnt > NET_TX_RING_SIZE) 1686 panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n", 1687 __func__); 1688 mbuf_grab(m_head); 1689 txq->mbufs[id] = m_head; 1690 tx->id = id; 1691 ref = gnttab_claim_grant_reference(&txq->gref_head); 1692 KASSERT((short)ref >= 0, ("Negative ref")); 1693 mfn = atop(segs[i].ds_addr); 1694 gnttab_grant_foreign_access_ref(ref, otherend_id, 1695 mfn, GNTMAP_readonly); 1696 tx->gref = txq->grant_ref[id] = ref; 1697 tx->offset = segs[i].ds_addr & PAGE_MASK; 1698 KASSERT(tx->offset + segs[i].ds_len <= PAGE_SIZE, 1699 ("mbuf segment crosses a page boundary")); 1700 tx->flags = 0; 1701 if (i == 0) { 1702 /* 1703 * The first fragment has the entire packet 1704 * size, subsequent fragments have just the 1705 * fragment size. The backend works out the 1706 * true size of the first fragment by 1707 * subtracting the sizes of the other 1708 * fragments. 1709 */ 1710 tx->size = m_head->m_pkthdr.len; 1711 1712 /* 1713 * The first fragment contains the checksum flags 1714 * and is optionally followed by extra data for 1715 * TSO etc. 1716 */ 1717 /** 1718 * CSUM_TSO requires checksum offloading. 1719 * Some versions of FreeBSD fail to 1720 * set CSUM_TCP in the CSUM_TSO case, 1721 * so we have to test for CSUM_TSO 1722 * explicitly. 1723 */ 1724 if (m_head->m_pkthdr.csum_flags 1725 & (CSUM_DELAY_DATA | CSUM_TSO)) { 1726 tx->flags |= (NETTXF_csum_blank 1727 | NETTXF_data_validated); 1728 } 1729 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 1730 struct netif_extra_info *gso = 1731 (struct netif_extra_info *) 1732 RING_GET_REQUEST(&txq->ring, 1733 ++txq->ring.req_prod_pvt); 1734 1735 tx->flags |= NETTXF_extra_info; 1736 1737 gso->u.gso.size = m_head->m_pkthdr.tso_segsz; 1738 gso->u.gso.type = 1739 XEN_NETIF_GSO_TYPE_TCPV4; 1740 gso->u.gso.pad = 0; 1741 gso->u.gso.features = 0; 1742 1743 gso->type = XEN_NETIF_EXTRA_TYPE_GSO; 1744 gso->flags = 0; 1745 } 1746 } else { 1747 tx->size = segs[i].ds_len; 1748 } 1749 if (i != nfrags - 1) 1750 tx->flags |= NETTXF_more_data; 1751 1752 txq->ring.req_prod_pvt++; 1753 } 1754 bus_dmamap_sync(np->dma_tag, map, BUS_DMASYNC_PREWRITE); 1755 BPF_MTAP(ifp, m_head); 1756 1757 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 1758 if_inc_counter(ifp, IFCOUNTER_OBYTES, m_head->m_pkthdr.len); 1759 if (m_head->m_flags & M_MCAST) 1760 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 1761 1762 xn_txeof(txq); 1763 1764 return (0); 1765 } 1766 1767 /* equivalent of network_open() in Linux */ 1768 static void 1769 xn_ifinit_locked(struct netfront_info *np) 1770 { 1771 struct ifnet *ifp; 1772 int i; 1773 struct netfront_rxq *rxq; 1774 1775 XN_LOCK_ASSERT(np); 1776 1777 ifp = np->xn_ifp; 1778 1779 if (ifp->if_drv_flags & IFF_DRV_RUNNING || !netfront_carrier_ok(np)) 1780 return; 1781 1782 xn_stop(np); 1783 1784 for (i = 0; i < np->num_queues; i++) { 1785 rxq = &np->rxq[i]; 1786 XN_RX_LOCK(rxq); 1787 xn_alloc_rx_buffers(rxq); 1788 rxq->ring.sring->rsp_event = rxq->ring.rsp_cons + 1; 1789 if (RING_HAS_UNCONSUMED_RESPONSES(&rxq->ring)) 1790 xn_rxeof(rxq); 1791 XN_RX_UNLOCK(rxq); 1792 } 1793 1794 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1795 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1796 if_link_state_change(ifp, LINK_STATE_UP); 1797 } 1798 1799 static void 1800 xn_ifinit(void *xsc) 1801 { 1802 struct netfront_info *sc = xsc; 1803 1804 XN_LOCK(sc); 1805 xn_ifinit_locked(sc); 1806 XN_UNLOCK(sc); 1807 } 1808 1809 static int 1810 xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1811 { 1812 struct netfront_info *sc = ifp->if_softc; 1813 struct ifreq *ifr = (struct ifreq *) data; 1814 device_t dev; 1815 #ifdef INET 1816 struct ifaddr *ifa = (struct ifaddr *)data; 1817 #endif 1818 int mask, error = 0, reinit; 1819 1820 dev = sc->xbdev; 1821 1822 switch(cmd) { 1823 case SIOCSIFADDR: 1824 #ifdef INET 1825 XN_LOCK(sc); 1826 if (ifa->ifa_addr->sa_family == AF_INET) { 1827 ifp->if_flags |= IFF_UP; 1828 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 1829 xn_ifinit_locked(sc); 1830 arp_ifinit(ifp, ifa); 1831 XN_UNLOCK(sc); 1832 } else { 1833 XN_UNLOCK(sc); 1834 #endif 1835 error = ether_ioctl(ifp, cmd, data); 1836 #ifdef INET 1837 } 1838 #endif 1839 break; 1840 case SIOCSIFMTU: 1841 if (ifp->if_mtu == ifr->ifr_mtu) 1842 break; 1843 1844 ifp->if_mtu = ifr->ifr_mtu; 1845 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1846 xn_ifinit(sc); 1847 break; 1848 case SIOCSIFFLAGS: 1849 XN_LOCK(sc); 1850 if (ifp->if_flags & IFF_UP) { 1851 /* 1852 * If only the state of the PROMISC flag changed, 1853 * then just use the 'set promisc mode' command 1854 * instead of reinitializing the entire NIC. Doing 1855 * a full re-init means reloading the firmware and 1856 * waiting for it to start up, which may take a 1857 * second or two. 1858 */ 1859 xn_ifinit_locked(sc); 1860 } else { 1861 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1862 xn_stop(sc); 1863 } 1864 } 1865 sc->xn_if_flags = ifp->if_flags; 1866 XN_UNLOCK(sc); 1867 break; 1868 case SIOCSIFCAP: 1869 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 1870 reinit = 0; 1871 1872 if (mask & IFCAP_TXCSUM) { 1873 ifp->if_capenable ^= IFCAP_TXCSUM; 1874 ifp->if_hwassist ^= XN_CSUM_FEATURES; 1875 } 1876 if (mask & IFCAP_TSO4) { 1877 ifp->if_capenable ^= IFCAP_TSO4; 1878 ifp->if_hwassist ^= CSUM_TSO; 1879 } 1880 1881 if (mask & (IFCAP_RXCSUM | IFCAP_LRO)) { 1882 /* These Rx features require us to renegotiate. */ 1883 reinit = 1; 1884 1885 if (mask & IFCAP_RXCSUM) 1886 ifp->if_capenable ^= IFCAP_RXCSUM; 1887 if (mask & IFCAP_LRO) 1888 ifp->if_capenable ^= IFCAP_LRO; 1889 } 1890 1891 if (reinit == 0) 1892 break; 1893 1894 /* 1895 * We must reset the interface so the backend picks up the 1896 * new features. 1897 */ 1898 device_printf(sc->xbdev, 1899 "performing interface reset due to feature change\n"); 1900 XN_LOCK(sc); 1901 netfront_carrier_off(sc); 1902 sc->xn_reset = true; 1903 /* 1904 * NB: the pending packet queue is not flushed, since 1905 * the interface should still support the old options. 1906 */ 1907 XN_UNLOCK(sc); 1908 /* 1909 * Delete the xenstore nodes that export features. 1910 * 1911 * NB: There's a xenbus state called 1912 * "XenbusStateReconfiguring", which is what we should set 1913 * here. Sadly none of the backends know how to handle it, 1914 * and simply disconnect from the frontend, so we will just 1915 * switch back to XenbusStateInitialising in order to force 1916 * a reconnection. 1917 */ 1918 xs_rm(XST_NIL, xenbus_get_node(dev), "feature-gso-tcpv4"); 1919 xs_rm(XST_NIL, xenbus_get_node(dev), "feature-no-csum-offload"); 1920 xenbus_set_state(dev, XenbusStateClosing); 1921 1922 /* 1923 * Wait for the frontend to reconnect before returning 1924 * from the ioctl. 30s should be more than enough for any 1925 * sane backend to reconnect. 1926 */ 1927 error = tsleep(sc, 0, "xn_rst", 30*hz); 1928 break; 1929 case SIOCADDMULTI: 1930 case SIOCDELMULTI: 1931 break; 1932 case SIOCSIFMEDIA: 1933 case SIOCGIFMEDIA: 1934 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 1935 break; 1936 default: 1937 error = ether_ioctl(ifp, cmd, data); 1938 } 1939 1940 return (error); 1941 } 1942 1943 static void 1944 xn_stop(struct netfront_info *sc) 1945 { 1946 struct ifnet *ifp; 1947 1948 XN_LOCK_ASSERT(sc); 1949 1950 ifp = sc->xn_ifp; 1951 1952 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 1953 if_link_state_change(ifp, LINK_STATE_DOWN); 1954 } 1955 1956 static void 1957 xn_rebuild_rx_bufs(struct netfront_rxq *rxq) 1958 { 1959 int requeue_idx, i; 1960 grant_ref_t ref; 1961 netif_rx_request_t *req; 1962 1963 for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { 1964 struct mbuf *m; 1965 u_long pfn; 1966 1967 if (rxq->mbufs[i] == NULL) 1968 continue; 1969 1970 m = rxq->mbufs[requeue_idx] = xn_get_rx_mbuf(rxq, i); 1971 ref = rxq->grant_ref[requeue_idx] = xn_get_rx_ref(rxq, i); 1972 1973 req = RING_GET_REQUEST(&rxq->ring, requeue_idx); 1974 pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT; 1975 1976 gnttab_grant_foreign_access_ref(ref, 1977 xenbus_get_otherend_id(rxq->info->xbdev), 1978 pfn, 0); 1979 1980 req->gref = ref; 1981 req->id = requeue_idx; 1982 1983 requeue_idx++; 1984 } 1985 1986 rxq->ring.req_prod_pvt = requeue_idx; 1987 } 1988 1989 /* START of Xenolinux helper functions adapted to FreeBSD */ 1990 static int 1991 xn_connect(struct netfront_info *np) 1992 { 1993 int i, error; 1994 u_int feature_rx_copy; 1995 struct netfront_rxq *rxq; 1996 struct netfront_txq *txq; 1997 1998 error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 1999 "feature-rx-copy", NULL, "%u", &feature_rx_copy); 2000 if (error != 0) 2001 feature_rx_copy = 0; 2002 2003 /* We only support rx copy. */ 2004 if (!feature_rx_copy) 2005 return (EPROTONOSUPPORT); 2006 2007 /* Recovery procedure: */ 2008 error = talk_to_backend(np->xbdev, np); 2009 if (error != 0) 2010 return (error); 2011 2012 /* Step 1: Reinitialise variables. */ 2013 xn_query_features(np); 2014 xn_configure_features(np); 2015 2016 /* Step 2: Release TX buffer */ 2017 for (i = 0; i < np->num_queues; i++) { 2018 txq = &np->txq[i]; 2019 xn_release_tx_bufs(txq); 2020 } 2021 2022 /* Step 3: Rebuild the RX buffer freelist and the RX ring itself. */ 2023 for (i = 0; i < np->num_queues; i++) { 2024 rxq = &np->rxq[i]; 2025 xn_rebuild_rx_bufs(rxq); 2026 } 2027 2028 /* Step 4: All public and private state should now be sane. Get 2029 * ready to start sending and receiving packets and give the driver 2030 * domain a kick because we've probably just requeued some 2031 * packets. 2032 */ 2033 netfront_carrier_on(np); 2034 wakeup(np); 2035 2036 return (0); 2037 } 2038 2039 static void 2040 xn_kick_rings(struct netfront_info *np) 2041 { 2042 struct netfront_rxq *rxq; 2043 struct netfront_txq *txq; 2044 int i; 2045 2046 for (i = 0; i < np->num_queues; i++) { 2047 txq = &np->txq[i]; 2048 rxq = &np->rxq[i]; 2049 xen_intr_signal(txq->xen_intr_handle); 2050 XN_TX_LOCK(txq); 2051 xn_txeof(txq); 2052 XN_TX_UNLOCK(txq); 2053 XN_RX_LOCK(rxq); 2054 xn_alloc_rx_buffers(rxq); 2055 XN_RX_UNLOCK(rxq); 2056 } 2057 } 2058 2059 static void 2060 xn_query_features(struct netfront_info *np) 2061 { 2062 int val; 2063 2064 device_printf(np->xbdev, "backend features:"); 2065 2066 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2067 "feature-sg", NULL, "%d", &val) != 0) 2068 val = 0; 2069 2070 np->maxfrags = 1; 2071 if (val) { 2072 np->maxfrags = MAX_TX_REQ_FRAGS; 2073 printf(" feature-sg"); 2074 } 2075 2076 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2077 "feature-gso-tcpv4", NULL, "%d", &val) != 0) 2078 val = 0; 2079 2080 np->xn_ifp->if_capabilities &= ~(IFCAP_TSO4|IFCAP_LRO); 2081 if (val) { 2082 np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO; 2083 printf(" feature-gso-tcp4"); 2084 } 2085 2086 /* 2087 * HW CSUM offload is assumed to be available unless 2088 * feature-no-csum-offload is set in xenstore. 2089 */ 2090 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2091 "feature-no-csum-offload", NULL, "%d", &val) != 0) 2092 val = 0; 2093 2094 np->xn_ifp->if_capabilities |= IFCAP_HWCSUM; 2095 if (val) { 2096 np->xn_ifp->if_capabilities &= ~(IFCAP_HWCSUM); 2097 printf(" feature-no-csum-offload"); 2098 } 2099 2100 printf("\n"); 2101 } 2102 2103 static int 2104 xn_configure_features(struct netfront_info *np) 2105 { 2106 int err, cap_enabled; 2107 #if (defined(INET) || defined(INET6)) 2108 int i; 2109 #endif 2110 struct ifnet *ifp; 2111 2112 ifp = np->xn_ifp; 2113 err = 0; 2114 2115 if ((ifp->if_capenable & ifp->if_capabilities) == ifp->if_capenable) { 2116 /* Current options are available, no need to do anything. */ 2117 return (0); 2118 } 2119 2120 /* Try to preserve as many options as possible. */ 2121 cap_enabled = ifp->if_capenable; 2122 ifp->if_capenable = ifp->if_hwassist = 0; 2123 2124 #if (defined(INET) || defined(INET6)) 2125 if ((cap_enabled & IFCAP_LRO) != 0) 2126 for (i = 0; i < np->num_queues; i++) 2127 tcp_lro_free(&np->rxq[i].lro); 2128 if (xn_enable_lro && 2129 (ifp->if_capabilities & cap_enabled & IFCAP_LRO) != 0) { 2130 ifp->if_capenable |= IFCAP_LRO; 2131 for (i = 0; i < np->num_queues; i++) { 2132 err = tcp_lro_init(&np->rxq[i].lro); 2133 if (err != 0) { 2134 device_printf(np->xbdev, 2135 "LRO initialization failed\n"); 2136 ifp->if_capenable &= ~IFCAP_LRO; 2137 break; 2138 } 2139 np->rxq[i].lro.ifp = ifp; 2140 } 2141 } 2142 if ((ifp->if_capabilities & cap_enabled & IFCAP_TSO4) != 0) { 2143 ifp->if_capenable |= IFCAP_TSO4; 2144 ifp->if_hwassist |= CSUM_TSO; 2145 } 2146 #endif 2147 if ((ifp->if_capabilities & cap_enabled & IFCAP_TXCSUM) != 0) { 2148 ifp->if_capenable |= IFCAP_TXCSUM; 2149 ifp->if_hwassist |= XN_CSUM_FEATURES; 2150 } 2151 if ((ifp->if_capabilities & cap_enabled & IFCAP_RXCSUM) != 0) 2152 ifp->if_capenable |= IFCAP_RXCSUM; 2153 2154 return (err); 2155 } 2156 2157 static int 2158 xn_txq_mq_start_locked(struct netfront_txq *txq, struct mbuf *m) 2159 { 2160 struct netfront_info *np; 2161 struct ifnet *ifp; 2162 struct buf_ring *br; 2163 int error, notify; 2164 2165 np = txq->info; 2166 br = txq->br; 2167 ifp = np->xn_ifp; 2168 error = 0; 2169 2170 XN_TX_LOCK_ASSERT(txq); 2171 2172 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2173 !netfront_carrier_ok(np)) { 2174 if (m != NULL) 2175 error = drbr_enqueue(ifp, br, m); 2176 return (error); 2177 } 2178 2179 if (m != NULL) { 2180 error = drbr_enqueue(ifp, br, m); 2181 if (error != 0) 2182 return (error); 2183 } 2184 2185 while ((m = drbr_peek(ifp, br)) != NULL) { 2186 if (!xn_tx_slot_available(txq)) { 2187 drbr_putback(ifp, br, m); 2188 break; 2189 } 2190 2191 error = xn_assemble_tx_request(txq, m); 2192 /* xn_assemble_tx_request always consumes the mbuf*/ 2193 if (error != 0) { 2194 drbr_advance(ifp, br); 2195 break; 2196 } 2197 2198 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&txq->ring, notify); 2199 if (notify) 2200 xen_intr_signal(txq->xen_intr_handle); 2201 2202 drbr_advance(ifp, br); 2203 } 2204 2205 if (RING_FULL(&txq->ring)) 2206 txq->full = true; 2207 2208 return (0); 2209 } 2210 2211 static int 2212 xn_txq_mq_start(struct ifnet *ifp, struct mbuf *m) 2213 { 2214 struct netfront_info *np; 2215 struct netfront_txq *txq; 2216 int i, npairs, error; 2217 2218 np = ifp->if_softc; 2219 npairs = np->num_queues; 2220 2221 if (!netfront_carrier_ok(np)) 2222 return (ENOBUFS); 2223 2224 KASSERT(npairs != 0, ("called with 0 available queues")); 2225 2226 /* check if flowid is set */ 2227 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 2228 i = m->m_pkthdr.flowid % npairs; 2229 else 2230 i = curcpu % npairs; 2231 2232 txq = &np->txq[i]; 2233 2234 if (XN_TX_TRYLOCK(txq) != 0) { 2235 error = xn_txq_mq_start_locked(txq, m); 2236 XN_TX_UNLOCK(txq); 2237 } else { 2238 error = drbr_enqueue(ifp, txq->br, m); 2239 taskqueue_enqueue(txq->tq, &txq->defrtask); 2240 } 2241 2242 return (error); 2243 } 2244 2245 static void 2246 xn_qflush(struct ifnet *ifp) 2247 { 2248 struct netfront_info *np; 2249 struct netfront_txq *txq; 2250 struct mbuf *m; 2251 int i; 2252 2253 np = ifp->if_softc; 2254 2255 for (i = 0; i < np->num_queues; i++) { 2256 txq = &np->txq[i]; 2257 2258 XN_TX_LOCK(txq); 2259 while ((m = buf_ring_dequeue_sc(txq->br)) != NULL) 2260 m_freem(m); 2261 XN_TX_UNLOCK(txq); 2262 } 2263 2264 if_qflush(ifp); 2265 } 2266 2267 /** 2268 * Create a network device. 2269 * @param dev Newbus device representing this virtual NIC. 2270 */ 2271 int 2272 create_netdev(device_t dev) 2273 { 2274 struct netfront_info *np; 2275 int err; 2276 struct ifnet *ifp; 2277 2278 np = device_get_softc(dev); 2279 2280 np->xbdev = dev; 2281 2282 mtx_init(&np->sc_lock, "xnsc", "netfront softc lock", MTX_DEF); 2283 2284 ifmedia_init(&np->sc_media, 0, xn_ifmedia_upd, xn_ifmedia_sts); 2285 ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); 2286 ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL); 2287 2288 err = xen_net_read_mac(dev, np->mac); 2289 if (err != 0) 2290 goto error; 2291 2292 /* Set up ifnet structure */ 2293 ifp = np->xn_ifp = if_alloc(IFT_ETHER); 2294 ifp->if_softc = np; 2295 if_initname(ifp, "xn", device_get_unit(dev)); 2296 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 2297 ifp->if_ioctl = xn_ioctl; 2298 2299 ifp->if_transmit = xn_txq_mq_start; 2300 ifp->if_qflush = xn_qflush; 2301 2302 ifp->if_init = xn_ifinit; 2303 2304 ifp->if_hwassist = XN_CSUM_FEATURES; 2305 /* Enable all supported features at device creation. */ 2306 ifp->if_capenable = ifp->if_capabilities = 2307 IFCAP_HWCSUM|IFCAP_TSO4|IFCAP_LRO; 2308 ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 2309 ifp->if_hw_tsomaxsegcount = MAX_TX_REQ_FRAGS; 2310 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 2311 2312 ether_ifattach(ifp, np->mac); 2313 netfront_carrier_off(np); 2314 2315 err = bus_dma_tag_create( 2316 bus_get_dma_tag(dev), /* parent */ 2317 1, PAGE_SIZE, /* algnmnt, boundary */ 2318 BUS_SPACE_MAXADDR, /* lowaddr */ 2319 BUS_SPACE_MAXADDR, /* highaddr */ 2320 NULL, NULL, /* filter, filterarg */ 2321 PAGE_SIZE * MAX_TX_REQ_FRAGS, /* max request size */ 2322 MAX_TX_REQ_FRAGS, /* max segments */ 2323 PAGE_SIZE, /* maxsegsize */ 2324 BUS_DMA_ALLOCNOW, /* flags */ 2325 NULL, NULL, /* lockfunc, lockarg */ 2326 &np->dma_tag); 2327 2328 return (err); 2329 2330 error: 2331 KASSERT(err != 0, ("Error path with no error code specified")); 2332 return (err); 2333 } 2334 2335 static int 2336 netfront_detach(device_t dev) 2337 { 2338 struct netfront_info *info = device_get_softc(dev); 2339 2340 DPRINTK("%s\n", xenbus_get_node(dev)); 2341 2342 netif_free(info); 2343 2344 return 0; 2345 } 2346 2347 static void 2348 netif_free(struct netfront_info *np) 2349 { 2350 2351 XN_LOCK(np); 2352 xn_stop(np); 2353 XN_UNLOCK(np); 2354 netif_disconnect_backend(np); 2355 ether_ifdetach(np->xn_ifp); 2356 free(np->rxq, M_DEVBUF); 2357 free(np->txq, M_DEVBUF); 2358 if_free(np->xn_ifp); 2359 np->xn_ifp = NULL; 2360 ifmedia_removeall(&np->sc_media); 2361 bus_dma_tag_destroy(np->dma_tag); 2362 } 2363 2364 static void 2365 netif_disconnect_backend(struct netfront_info *np) 2366 { 2367 u_int i; 2368 2369 for (i = 0; i < np->num_queues; i++) { 2370 XN_RX_LOCK(&np->rxq[i]); 2371 XN_TX_LOCK(&np->txq[i]); 2372 } 2373 netfront_carrier_off(np); 2374 for (i = 0; i < np->num_queues; i++) { 2375 XN_RX_UNLOCK(&np->rxq[i]); 2376 XN_TX_UNLOCK(&np->txq[i]); 2377 } 2378 2379 for (i = 0; i < np->num_queues; i++) { 2380 disconnect_rxq(&np->rxq[i]); 2381 disconnect_txq(&np->txq[i]); 2382 } 2383 } 2384 2385 static int 2386 xn_ifmedia_upd(struct ifnet *ifp) 2387 { 2388 2389 return (0); 2390 } 2391 2392 static void 2393 xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 2394 { 2395 2396 ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE; 2397 ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; 2398 } 2399 2400 /* ** Driver registration ** */ 2401 static device_method_t netfront_methods[] = { 2402 /* Device interface */ 2403 DEVMETHOD(device_probe, netfront_probe), 2404 DEVMETHOD(device_attach, netfront_attach), 2405 DEVMETHOD(device_detach, netfront_detach), 2406 DEVMETHOD(device_shutdown, bus_generic_shutdown), 2407 DEVMETHOD(device_suspend, netfront_suspend), 2408 DEVMETHOD(device_resume, netfront_resume), 2409 2410 /* Xenbus interface */ 2411 DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed), 2412 2413 DEVMETHOD_END 2414 }; 2415 2416 static driver_t netfront_driver = { 2417 "xn", 2418 netfront_methods, 2419 sizeof(struct netfront_info), 2420 }; 2421 2422 DRIVER_MODULE(xe, xenbusb_front, netfront_driver, NULL, NULL); 2423