1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2004-2006 Kip Macy 5 * Copyright (c) 2015 Wei Liu <wei.liu2@citrix.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 36 #include <sys/param.h> 37 #include <sys/sockio.h> 38 #include <sys/limits.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/module.h> 42 #include <sys/kernel.h> 43 #include <sys/socket.h> 44 #include <sys/sysctl.h> 45 #include <sys/taskqueue.h> 46 47 #include <net/if.h> 48 #include <net/if_var.h> 49 #include <net/if_arp.h> 50 #include <net/ethernet.h> 51 #include <net/if_media.h> 52 #include <net/bpf.h> 53 #include <net/if_types.h> 54 55 #include <netinet/in.h> 56 #include <netinet/ip.h> 57 #include <netinet/if_ether.h> 58 #include <netinet/tcp.h> 59 #include <netinet/tcp_lro.h> 60 61 #include <vm/vm.h> 62 #include <vm/pmap.h> 63 64 #include <sys/bus.h> 65 66 #include <xen/xen-os.h> 67 #include <xen/hypervisor.h> 68 #include <xen/xen_intr.h> 69 #include <xen/gnttab.h> 70 #include <contrib/xen/memory.h> 71 #include <contrib/xen/io/netif.h> 72 #include <xen/xenbus/xenbusvar.h> 73 74 #include <machine/bus.h> 75 76 #include "xenbus_if.h" 77 78 /* Features supported by all backends. TSO and LRO can be negotiated */ 79 #define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) 80 81 #define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE) 82 #define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE) 83 84 #define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1) 85 86 /* 87 * Should the driver do LRO on the RX end 88 * this can be toggled on the fly, but the 89 * interface must be reset (down/up) for it 90 * to take effect. 91 */ 92 static int xn_enable_lro = 1; 93 TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro); 94 95 /* 96 * Number of pairs of queues. 97 */ 98 static unsigned long xn_num_queues = 4; 99 TUNABLE_ULONG("hw.xn.num_queues", &xn_num_queues); 100 101 /** 102 * \brief The maximum allowed data fragments in a single transmit 103 * request. 104 * 105 * This limit is imposed by the backend driver. We assume here that 106 * we are dealing with a Linux driver domain and have set our limit 107 * to mirror the Linux MAX_SKB_FRAGS constant. 108 */ 109 #define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2) 110 111 #define RX_COPY_THRESHOLD 256 112 113 #define net_ratelimit() 0 114 115 struct netfront_rxq; 116 struct netfront_txq; 117 struct netfront_info; 118 struct netfront_rx_info; 119 120 static void xn_txeof(struct netfront_txq *); 121 static void xn_rxeof(struct netfront_rxq *); 122 static void xn_alloc_rx_buffers(struct netfront_rxq *); 123 static void xn_alloc_rx_buffers_callout(void *arg); 124 125 static void xn_release_rx_bufs(struct netfront_rxq *); 126 static void xn_release_tx_bufs(struct netfront_txq *); 127 128 static void xn_rxq_intr(struct netfront_rxq *); 129 static void xn_txq_intr(struct netfront_txq *); 130 static void xn_intr(void *); 131 static int xn_assemble_tx_request(struct netfront_txq *, struct mbuf *); 132 static int xn_ioctl(if_t, u_long, caddr_t); 133 static void xn_ifinit_locked(struct netfront_info *); 134 static void xn_ifinit(void *); 135 static void xn_stop(struct netfront_info *); 136 static void xn_query_features(struct netfront_info *np); 137 static int xn_configure_features(struct netfront_info *np); 138 static void netif_free(struct netfront_info *info); 139 static int netfront_detach(device_t dev); 140 141 static int xn_txq_mq_start_locked(struct netfront_txq *, struct mbuf *); 142 static int xn_txq_mq_start(if_t, struct mbuf *); 143 144 static int talk_to_backend(device_t dev, struct netfront_info *info); 145 static int create_netdev(device_t dev); 146 static void netif_disconnect_backend(struct netfront_info *info); 147 static int setup_device(device_t dev, struct netfront_info *info, 148 unsigned long); 149 static int xn_ifmedia_upd(if_t ifp); 150 static void xn_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr); 151 152 static int xn_connect(struct netfront_info *); 153 static void xn_kick_rings(struct netfront_info *); 154 155 static int xn_get_responses(struct netfront_rxq *, 156 struct netfront_rx_info *, RING_IDX, RING_IDX *, 157 struct mbuf **); 158 159 #define virt_to_mfn(x) (vtophys(x) >> PAGE_SHIFT) 160 161 #define INVALID_P2M_ENTRY (~0UL) 162 #define XN_QUEUE_NAME_LEN 8 /* xn{t,r}x_%u, allow for two digits */ 163 struct netfront_rxq { 164 struct netfront_info *info; 165 u_int id; 166 char name[XN_QUEUE_NAME_LEN]; 167 struct mtx lock; 168 169 int ring_ref; 170 netif_rx_front_ring_t ring; 171 xen_intr_handle_t xen_intr_handle; 172 173 grant_ref_t gref_head; 174 grant_ref_t grant_ref[NET_RX_RING_SIZE + 1]; 175 176 struct mbuf *mbufs[NET_RX_RING_SIZE + 1]; 177 178 struct lro_ctrl lro; 179 180 struct callout rx_refill; 181 }; 182 183 struct netfront_txq { 184 struct netfront_info *info; 185 u_int id; 186 char name[XN_QUEUE_NAME_LEN]; 187 struct mtx lock; 188 189 int ring_ref; 190 netif_tx_front_ring_t ring; 191 xen_intr_handle_t xen_intr_handle; 192 193 grant_ref_t gref_head; 194 grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; 195 196 struct mbuf *mbufs[NET_TX_RING_SIZE + 1]; 197 int mbufs_cnt; 198 struct buf_ring *br; 199 200 struct taskqueue *tq; 201 struct task defrtask; 202 203 bus_dma_segment_t segs[MAX_TX_REQ_FRAGS]; 204 struct mbuf_xennet { 205 struct m_tag tag; 206 bus_dma_tag_t dma_tag; 207 bus_dmamap_t dma_map; 208 struct netfront_txq *txq; 209 SLIST_ENTRY(mbuf_xennet) next; 210 u_int count; 211 } xennet_tag[NET_TX_RING_SIZE + 1]; 212 SLIST_HEAD(, mbuf_xennet) tags; 213 214 bool full; 215 }; 216 217 struct netfront_info { 218 if_t xn_ifp; 219 220 struct mtx sc_lock; 221 222 u_int num_queues; 223 struct netfront_rxq *rxq; 224 struct netfront_txq *txq; 225 226 u_int carrier; 227 u_int maxfrags; 228 229 device_t xbdev; 230 uint8_t mac[ETHER_ADDR_LEN]; 231 232 int xn_if_flags; 233 234 struct ifmedia sc_media; 235 236 bus_dma_tag_t dma_tag; 237 238 bool xn_reset; 239 }; 240 241 struct netfront_rx_info { 242 struct netif_rx_response rx; 243 struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; 244 }; 245 246 #define XN_RX_LOCK(_q) mtx_lock(&(_q)->lock) 247 #define XN_RX_UNLOCK(_q) mtx_unlock(&(_q)->lock) 248 249 #define XN_TX_LOCK(_q) mtx_lock(&(_q)->lock) 250 #define XN_TX_TRYLOCK(_q) mtx_trylock(&(_q)->lock) 251 #define XN_TX_UNLOCK(_q) mtx_unlock(&(_q)->lock) 252 253 #define XN_LOCK(_sc) mtx_lock(&(_sc)->sc_lock); 254 #define XN_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_lock); 255 256 #define XN_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_lock, MA_OWNED); 257 #define XN_RX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); 258 #define XN_TX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); 259 260 #define netfront_carrier_on(netif) ((netif)->carrier = 1) 261 #define netfront_carrier_off(netif) ((netif)->carrier = 0) 262 #define netfront_carrier_ok(netif) ((netif)->carrier) 263 264 /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */ 265 266 static inline void 267 add_id_to_freelist(struct mbuf **list, uintptr_t id) 268 { 269 270 KASSERT(id != 0, 271 ("%s: the head item (0) must always be free.", __func__)); 272 list[id] = list[0]; 273 list[0] = (struct mbuf *)id; 274 } 275 276 static inline unsigned short 277 get_id_from_freelist(struct mbuf **list) 278 { 279 uintptr_t id; 280 281 id = (uintptr_t)list[0]; 282 KASSERT(id != 0, 283 ("%s: the head item (0) must always remain free.", __func__)); 284 list[0] = list[id]; 285 return (id); 286 } 287 288 static inline int 289 xn_rxidx(RING_IDX idx) 290 { 291 292 return idx & (NET_RX_RING_SIZE - 1); 293 } 294 295 static inline struct mbuf * 296 xn_get_rx_mbuf(struct netfront_rxq *rxq, RING_IDX ri) 297 { 298 int i; 299 struct mbuf *m; 300 301 i = xn_rxidx(ri); 302 m = rxq->mbufs[i]; 303 rxq->mbufs[i] = NULL; 304 return (m); 305 } 306 307 static inline grant_ref_t 308 xn_get_rx_ref(struct netfront_rxq *rxq, RING_IDX ri) 309 { 310 int i = xn_rxidx(ri); 311 grant_ref_t ref = rxq->grant_ref[i]; 312 313 KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n")); 314 rxq->grant_ref[i] = GRANT_REF_INVALID; 315 return (ref); 316 } 317 318 #define MTAG_COOKIE 1218492000 319 #define MTAG_XENNET 0 320 321 static void mbuf_grab(struct mbuf *m) 322 { 323 struct mbuf_xennet *ref; 324 325 ref = (struct mbuf_xennet *)m_tag_locate(m, MTAG_COOKIE, 326 MTAG_XENNET, NULL); 327 KASSERT(ref != NULL, ("Cannot find refcount")); 328 ref->count++; 329 } 330 331 static void mbuf_release(struct mbuf *m) 332 { 333 struct mbuf_xennet *ref; 334 335 ref = (struct mbuf_xennet *)m_tag_locate(m, MTAG_COOKIE, 336 MTAG_XENNET, NULL); 337 KASSERT(ref != NULL, ("Cannot find refcount")); 338 KASSERT(ref->count > 0, ("Invalid reference count")); 339 340 if (--ref->count == 0) 341 m_freem(m); 342 } 343 344 static void tag_free(struct m_tag *t) 345 { 346 struct mbuf_xennet *ref = (struct mbuf_xennet *)t; 347 348 KASSERT(ref->count == 0, ("Free mbuf tag with pending refcnt")); 349 bus_dmamap_sync(ref->dma_tag, ref->dma_map, BUS_DMASYNC_POSTWRITE); 350 bus_dmamap_destroy(ref->dma_tag, ref->dma_map); 351 SLIST_INSERT_HEAD(&ref->txq->tags, ref, next); 352 } 353 354 #define IPRINTK(fmt, args...) \ 355 printf("[XEN] " fmt, ##args) 356 #ifdef INVARIANTS 357 #define WPRINTK(fmt, args...) \ 358 printf("[XEN] " fmt, ##args) 359 #else 360 #define WPRINTK(fmt, args...) 361 #endif 362 #ifdef DEBUG 363 #define DPRINTK(fmt, args...) \ 364 printf("[XEN] %s: " fmt, __func__, ##args) 365 #else 366 #define DPRINTK(fmt, args...) 367 #endif 368 369 /** 370 * Read the 'mac' node at the given device's node in the store, and parse that 371 * as colon-separated octets, placing result the given mac array. mac must be 372 * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h). 373 * Return 0 on success, or errno on error. 374 */ 375 static int 376 xen_net_read_mac(device_t dev, uint8_t mac[]) 377 { 378 int error, i; 379 char *s, *e, *macstr; 380 const char *path; 381 382 path = xenbus_get_node(dev); 383 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 384 if (error == ENOENT) { 385 /* 386 * Deal with missing mac XenStore nodes on devices with 387 * HVM emulation (the 'ioemu' configuration attribute) 388 * enabled. 389 * 390 * The HVM emulator may execute in a stub device model 391 * domain which lacks the permission, only given to Dom0, 392 * to update the guest's XenStore tree. For this reason, 393 * the HVM emulator doesn't even attempt to write the 394 * front-side mac node, even when operating in Dom0. 395 * However, there should always be a mac listed in the 396 * backend tree. Fallback to this version if our query 397 * of the front side XenStore location doesn't find 398 * anything. 399 */ 400 path = xenbus_get_otherend_path(dev); 401 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 402 } 403 if (error != 0) { 404 xenbus_dev_fatal(dev, error, "parsing %s/mac", path); 405 return (error); 406 } 407 408 s = macstr; 409 for (i = 0; i < ETHER_ADDR_LEN; i++) { 410 mac[i] = strtoul(s, &e, 16); 411 if (s == e || (e[0] != ':' && e[0] != 0)) { 412 free(macstr, M_XENBUS); 413 return (ENOENT); 414 } 415 s = &e[1]; 416 } 417 free(macstr, M_XENBUS); 418 return (0); 419 } 420 421 /** 422 * Entry point to this code when a new device is created. Allocate the basic 423 * structures and the ring buffers for communication with the backend, and 424 * inform the backend of the appropriate details for those. Switch to 425 * Connected state. 426 */ 427 static int 428 netfront_probe(device_t dev) 429 { 430 431 if (xen_pv_nics_disabled()) 432 return (ENXIO); 433 434 if (!strcmp(xenbus_get_type(dev), "vif")) { 435 device_set_desc(dev, "Virtual Network Interface"); 436 return (0); 437 } 438 439 return (ENXIO); 440 } 441 442 static int 443 netfront_attach(device_t dev) 444 { 445 int err; 446 447 err = create_netdev(dev); 448 if (err != 0) { 449 xenbus_dev_fatal(dev, err, "creating netdev"); 450 return (err); 451 } 452 453 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), 454 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 455 OID_AUTO, "enable_lro", CTLFLAG_RW, 456 &xn_enable_lro, 0, "Large Receive Offload"); 457 458 SYSCTL_ADD_ULONG(device_get_sysctl_ctx(dev), 459 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 460 OID_AUTO, "num_queues", CTLFLAG_RD, 461 &xn_num_queues, "Number of pairs of queues"); 462 463 return (0); 464 } 465 466 static int 467 netfront_suspend(device_t dev) 468 { 469 struct netfront_info *np = device_get_softc(dev); 470 u_int i; 471 472 for (i = 0; i < np->num_queues; i++) { 473 XN_RX_LOCK(&np->rxq[i]); 474 XN_TX_LOCK(&np->txq[i]); 475 } 476 netfront_carrier_off(np); 477 for (i = 0; i < np->num_queues; i++) { 478 XN_RX_UNLOCK(&np->rxq[i]); 479 XN_TX_UNLOCK(&np->txq[i]); 480 } 481 return (0); 482 } 483 484 /** 485 * We are reconnecting to the backend, due to a suspend/resume, or a backend 486 * driver restart. We tear down our netif structure and recreate it, but 487 * leave the device-layer structures intact so that this is transparent to the 488 * rest of the kernel. 489 */ 490 static int 491 netfront_resume(device_t dev) 492 { 493 struct netfront_info *info = device_get_softc(dev); 494 u_int i; 495 496 if (xen_suspend_cancelled) { 497 for (i = 0; i < info->num_queues; i++) { 498 XN_RX_LOCK(&info->rxq[i]); 499 XN_TX_LOCK(&info->txq[i]); 500 } 501 netfront_carrier_on(info); 502 for (i = 0; i < info->num_queues; i++) { 503 XN_RX_UNLOCK(&info->rxq[i]); 504 XN_TX_UNLOCK(&info->txq[i]); 505 } 506 return (0); 507 } 508 509 netif_disconnect_backend(info); 510 return (0); 511 } 512 513 static int 514 write_queue_xenstore_keys(device_t dev, 515 struct netfront_rxq *rxq, 516 struct netfront_txq *txq, 517 struct xs_transaction *xst, bool hierarchy) 518 { 519 int err; 520 const char *message; 521 const char *node = xenbus_get_node(dev); 522 char *path; 523 size_t path_size; 524 525 KASSERT(rxq->id == txq->id, ("Mismatch between RX and TX queue ids")); 526 /* Split event channel support is not yet there. */ 527 KASSERT(rxq->xen_intr_handle == txq->xen_intr_handle, 528 ("Split event channels are not supported")); 529 530 if (hierarchy) { 531 path_size = strlen(node) + 10; 532 path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); 533 snprintf(path, path_size, "%s/queue-%u", node, rxq->id); 534 } else { 535 path_size = strlen(node) + 1; 536 path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); 537 snprintf(path, path_size, "%s", node); 538 } 539 540 err = xs_printf(*xst, path, "tx-ring-ref","%u", txq->ring_ref); 541 if (err != 0) { 542 message = "writing tx ring-ref"; 543 goto error; 544 } 545 err = xs_printf(*xst, path, "rx-ring-ref","%u", rxq->ring_ref); 546 if (err != 0) { 547 message = "writing rx ring-ref"; 548 goto error; 549 } 550 err = xs_printf(*xst, path, "event-channel", "%u", 551 xen_intr_port(rxq->xen_intr_handle)); 552 if (err != 0) { 553 message = "writing event-channel"; 554 goto error; 555 } 556 557 free(path, M_DEVBUF); 558 559 return (0); 560 561 error: 562 free(path, M_DEVBUF); 563 xenbus_dev_fatal(dev, err, "%s", message); 564 565 return (err); 566 } 567 568 /* Common code used when first setting up, and when resuming. */ 569 static int 570 talk_to_backend(device_t dev, struct netfront_info *info) 571 { 572 const char *message; 573 struct xs_transaction xst; 574 const char *node = xenbus_get_node(dev); 575 int err; 576 unsigned long num_queues, max_queues = 0; 577 unsigned int i; 578 579 err = xen_net_read_mac(dev, info->mac); 580 if (err != 0) { 581 xenbus_dev_fatal(dev, err, "parsing %s/mac", node); 582 goto out; 583 } 584 585 err = xs_scanf(XST_NIL, xenbus_get_otherend_path(info->xbdev), 586 "multi-queue-max-queues", NULL, "%lu", &max_queues); 587 if (err != 0) 588 max_queues = 1; 589 num_queues = xn_num_queues; 590 if (num_queues > max_queues) 591 num_queues = max_queues; 592 593 err = setup_device(dev, info, num_queues); 594 if (err != 0) 595 goto out; 596 597 again: 598 err = xs_transaction_start(&xst); 599 if (err != 0) { 600 xenbus_dev_fatal(dev, err, "starting transaction"); 601 goto free; 602 } 603 604 if (info->num_queues == 1) { 605 err = write_queue_xenstore_keys(dev, &info->rxq[0], 606 &info->txq[0], &xst, false); 607 if (err != 0) 608 goto abort_transaction_no_def_error; 609 } else { 610 err = xs_printf(xst, node, "multi-queue-num-queues", 611 "%u", info->num_queues); 612 if (err != 0) { 613 message = "writing multi-queue-num-queues"; 614 goto abort_transaction; 615 } 616 617 for (i = 0; i < info->num_queues; i++) { 618 err = write_queue_xenstore_keys(dev, &info->rxq[i], 619 &info->txq[i], &xst, true); 620 if (err != 0) 621 goto abort_transaction_no_def_error; 622 } 623 } 624 625 err = xs_printf(xst, node, "request-rx-copy", "%u", 1); 626 if (err != 0) { 627 message = "writing request-rx-copy"; 628 goto abort_transaction; 629 } 630 err = xs_printf(xst, node, "feature-rx-notify", "%d", 1); 631 if (err != 0) { 632 message = "writing feature-rx-notify"; 633 goto abort_transaction; 634 } 635 err = xs_printf(xst, node, "feature-sg", "%d", 1); 636 if (err != 0) { 637 message = "writing feature-sg"; 638 goto abort_transaction; 639 } 640 if ((if_getcapenable(info->xn_ifp) & IFCAP_LRO) != 0) { 641 err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1); 642 if (err != 0) { 643 message = "writing feature-gso-tcpv4"; 644 goto abort_transaction; 645 } 646 } 647 if ((if_getcapenable(info->xn_ifp) & IFCAP_RXCSUM) == 0) { 648 err = xs_printf(xst, node, "feature-no-csum-offload", "%d", 1); 649 if (err != 0) { 650 message = "writing feature-no-csum-offload"; 651 goto abort_transaction; 652 } 653 } 654 655 err = xs_transaction_end(xst, 0); 656 if (err != 0) { 657 if (err == EAGAIN) 658 goto again; 659 xenbus_dev_fatal(dev, err, "completing transaction"); 660 goto free; 661 } 662 663 return 0; 664 665 abort_transaction: 666 xenbus_dev_fatal(dev, err, "%s", message); 667 abort_transaction_no_def_error: 668 xs_transaction_end(xst, 1); 669 free: 670 netif_free(info); 671 out: 672 return (err); 673 } 674 675 static void 676 xn_rxq_intr(struct netfront_rxq *rxq) 677 { 678 679 XN_RX_LOCK(rxq); 680 xn_rxeof(rxq); 681 XN_RX_UNLOCK(rxq); 682 } 683 684 static void 685 xn_txq_start(struct netfront_txq *txq) 686 { 687 struct netfront_info *np = txq->info; 688 if_t ifp = np->xn_ifp; 689 690 XN_TX_LOCK_ASSERT(txq); 691 if (!drbr_empty(ifp, txq->br)) 692 xn_txq_mq_start_locked(txq, NULL); 693 } 694 695 static void 696 xn_txq_intr(struct netfront_txq *txq) 697 { 698 699 XN_TX_LOCK(txq); 700 if (RING_HAS_UNCONSUMED_RESPONSES(&txq->ring)) 701 xn_txeof(txq); 702 xn_txq_start(txq); 703 XN_TX_UNLOCK(txq); 704 } 705 706 static void 707 xn_txq_tq_deferred(void *xtxq, int pending) 708 { 709 struct netfront_txq *txq = xtxq; 710 711 XN_TX_LOCK(txq); 712 xn_txq_start(txq); 713 XN_TX_UNLOCK(txq); 714 } 715 716 static void 717 disconnect_rxq(struct netfront_rxq *rxq) 718 { 719 720 xn_release_rx_bufs(rxq); 721 gnttab_free_grant_references(rxq->gref_head); 722 gnttab_end_foreign_access(rxq->ring_ref, NULL); 723 /* 724 * No split event channel support at the moment, handle will 725 * be unbound in tx. So no need to call xen_intr_unbind here, 726 * but we do want to reset the handler to 0. 727 */ 728 rxq->xen_intr_handle = 0; 729 } 730 731 static void 732 destroy_rxq(struct netfront_rxq *rxq) 733 { 734 735 callout_drain(&rxq->rx_refill); 736 free(rxq->ring.sring, M_DEVBUF); 737 } 738 739 static void 740 destroy_rxqs(struct netfront_info *np) 741 { 742 int i; 743 744 for (i = 0; i < np->num_queues; i++) 745 destroy_rxq(&np->rxq[i]); 746 747 free(np->rxq, M_DEVBUF); 748 np->rxq = NULL; 749 } 750 751 static int 752 setup_rxqs(device_t dev, struct netfront_info *info, 753 unsigned long num_queues) 754 { 755 int q, i; 756 int error; 757 netif_rx_sring_t *rxs; 758 struct netfront_rxq *rxq; 759 760 info->rxq = malloc(sizeof(struct netfront_rxq) * num_queues, 761 M_DEVBUF, M_WAITOK|M_ZERO); 762 763 for (q = 0; q < num_queues; q++) { 764 rxq = &info->rxq[q]; 765 766 rxq->id = q; 767 rxq->info = info; 768 rxq->ring_ref = GRANT_REF_INVALID; 769 rxq->ring.sring = NULL; 770 snprintf(rxq->name, XN_QUEUE_NAME_LEN, "xnrx_%u", q); 771 mtx_init(&rxq->lock, rxq->name, "netfront receive lock", 772 MTX_DEF); 773 774 for (i = 0; i <= NET_RX_RING_SIZE; i++) { 775 rxq->mbufs[i] = NULL; 776 rxq->grant_ref[i] = GRANT_REF_INVALID; 777 } 778 779 /* Start resources allocation */ 780 781 if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, 782 &rxq->gref_head) != 0) { 783 device_printf(dev, "allocating rx gref"); 784 error = ENOMEM; 785 goto fail; 786 } 787 788 rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, 789 M_WAITOK|M_ZERO); 790 SHARED_RING_INIT(rxs); 791 FRONT_RING_INIT(&rxq->ring, rxs, PAGE_SIZE); 792 793 error = xenbus_grant_ring(dev, virt_to_mfn(rxs), 794 &rxq->ring_ref); 795 if (error != 0) { 796 device_printf(dev, "granting rx ring page"); 797 goto fail_grant_ring; 798 } 799 800 callout_init(&rxq->rx_refill, 1); 801 } 802 803 return (0); 804 805 fail_grant_ring: 806 gnttab_free_grant_references(rxq->gref_head); 807 free(rxq->ring.sring, M_DEVBUF); 808 fail: 809 for (; q >= 0; q--) { 810 disconnect_rxq(&info->rxq[q]); 811 destroy_rxq(&info->rxq[q]); 812 } 813 814 free(info->rxq, M_DEVBUF); 815 return (error); 816 } 817 818 static void 819 disconnect_txq(struct netfront_txq *txq) 820 { 821 822 xn_release_tx_bufs(txq); 823 gnttab_free_grant_references(txq->gref_head); 824 gnttab_end_foreign_access(txq->ring_ref, NULL); 825 xen_intr_unbind(&txq->xen_intr_handle); 826 } 827 828 static void 829 destroy_txq(struct netfront_txq *txq) 830 { 831 unsigned int i; 832 833 free(txq->ring.sring, M_DEVBUF); 834 buf_ring_free(txq->br, M_DEVBUF); 835 taskqueue_drain_all(txq->tq); 836 taskqueue_free(txq->tq); 837 838 for (i = 0; i <= NET_TX_RING_SIZE; i++) { 839 bus_dmamap_destroy(txq->info->dma_tag, 840 txq->xennet_tag[i].dma_map); 841 txq->xennet_tag[i].dma_map = NULL; 842 } 843 } 844 845 static void 846 destroy_txqs(struct netfront_info *np) 847 { 848 int i; 849 850 for (i = 0; i < np->num_queues; i++) 851 destroy_txq(&np->txq[i]); 852 853 free(np->txq, M_DEVBUF); 854 np->txq = NULL; 855 } 856 857 static int 858 setup_txqs(device_t dev, struct netfront_info *info, 859 unsigned long num_queues) 860 { 861 int q, i; 862 int error; 863 netif_tx_sring_t *txs; 864 struct netfront_txq *txq; 865 866 info->txq = malloc(sizeof(struct netfront_txq) * num_queues, 867 M_DEVBUF, M_WAITOK|M_ZERO); 868 869 for (q = 0; q < num_queues; q++) { 870 txq = &info->txq[q]; 871 872 txq->id = q; 873 txq->info = info; 874 875 txq->ring_ref = GRANT_REF_INVALID; 876 txq->ring.sring = NULL; 877 878 snprintf(txq->name, XN_QUEUE_NAME_LEN, "xntx_%u", q); 879 880 mtx_init(&txq->lock, txq->name, "netfront transmit lock", 881 MTX_DEF); 882 SLIST_INIT(&txq->tags); 883 884 for (i = 0; i <= NET_TX_RING_SIZE; i++) { 885 txq->mbufs[i] = (void *) ((u_long) i+1); 886 txq->grant_ref[i] = GRANT_REF_INVALID; 887 txq->xennet_tag[i].txq = txq; 888 txq->xennet_tag[i].dma_tag = info->dma_tag; 889 error = bus_dmamap_create(info->dma_tag, 0, 890 &txq->xennet_tag[i].dma_map); 891 if (error != 0) { 892 device_printf(dev, 893 "failed to allocate dma map\n"); 894 goto fail; 895 } 896 m_tag_setup(&txq->xennet_tag[i].tag, 897 MTAG_COOKIE, MTAG_XENNET, 898 sizeof(txq->xennet_tag[i]) - 899 sizeof(txq->xennet_tag[i].tag)); 900 txq->xennet_tag[i].tag.m_tag_free = &tag_free; 901 SLIST_INSERT_HEAD(&txq->tags, &txq->xennet_tag[i], 902 next); 903 } 904 txq->mbufs[NET_TX_RING_SIZE] = (void *)0; 905 906 /* Start resources allocation. */ 907 908 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, 909 &txq->gref_head) != 0) { 910 device_printf(dev, "failed to allocate tx grant refs\n"); 911 error = ENOMEM; 912 goto fail; 913 } 914 915 txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, 916 M_WAITOK|M_ZERO); 917 SHARED_RING_INIT(txs); 918 FRONT_RING_INIT(&txq->ring, txs, PAGE_SIZE); 919 920 error = xenbus_grant_ring(dev, virt_to_mfn(txs), 921 &txq->ring_ref); 922 if (error != 0) { 923 device_printf(dev, "failed to grant tx ring\n"); 924 goto fail_grant_ring; 925 } 926 927 txq->br = buf_ring_alloc(NET_TX_RING_SIZE, M_DEVBUF, 928 M_WAITOK, &txq->lock); 929 TASK_INIT(&txq->defrtask, 0, xn_txq_tq_deferred, txq); 930 931 txq->tq = taskqueue_create(txq->name, M_WAITOK, 932 taskqueue_thread_enqueue, &txq->tq); 933 934 error = taskqueue_start_threads(&txq->tq, 1, PI_NET, 935 "%s txq %d", device_get_nameunit(dev), txq->id); 936 if (error != 0) { 937 device_printf(dev, "failed to start tx taskq %d\n", 938 txq->id); 939 goto fail_start_thread; 940 } 941 942 error = xen_intr_alloc_and_bind_local_port(dev, 943 xenbus_get_otherend_id(dev), /* filter */ NULL, xn_intr, 944 &info->txq[q], INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY, 945 &txq->xen_intr_handle); 946 947 if (error != 0) { 948 device_printf(dev, "xen_intr_alloc_and_bind_local_port failed\n"); 949 goto fail_bind_port; 950 } 951 } 952 953 return (0); 954 955 fail_bind_port: 956 taskqueue_drain_all(txq->tq); 957 fail_start_thread: 958 buf_ring_free(txq->br, M_DEVBUF); 959 taskqueue_free(txq->tq); 960 gnttab_end_foreign_access(txq->ring_ref, NULL); 961 fail_grant_ring: 962 gnttab_free_grant_references(txq->gref_head); 963 free(txq->ring.sring, M_DEVBUF); 964 fail: 965 for (; q >= 0; q--) { 966 disconnect_txq(&info->txq[q]); 967 destroy_txq(&info->txq[q]); 968 } 969 970 free(info->txq, M_DEVBUF); 971 return (error); 972 } 973 974 static int 975 setup_device(device_t dev, struct netfront_info *info, 976 unsigned long num_queues) 977 { 978 int error; 979 int q; 980 981 if (info->txq) 982 destroy_txqs(info); 983 984 if (info->rxq) 985 destroy_rxqs(info); 986 987 info->num_queues = 0; 988 989 error = setup_rxqs(dev, info, num_queues); 990 if (error != 0) 991 goto out; 992 error = setup_txqs(dev, info, num_queues); 993 if (error != 0) 994 goto out; 995 996 info->num_queues = num_queues; 997 998 /* No split event channel at the moment. */ 999 for (q = 0; q < num_queues; q++) 1000 info->rxq[q].xen_intr_handle = info->txq[q].xen_intr_handle; 1001 1002 return (0); 1003 1004 out: 1005 KASSERT(error != 0, ("Error path taken without providing an error code")); 1006 return (error); 1007 } 1008 1009 #ifdef INET 1010 static u_int 1011 netfront_addr_cb(void *arg, struct ifaddr *a, u_int count) 1012 { 1013 arp_ifinit((if_t)arg, a); 1014 return (1); 1015 } 1016 /** 1017 * If this interface has an ipv4 address, send an arp for it. This 1018 * helps to get the network going again after migrating hosts. 1019 */ 1020 static void 1021 netfront_send_fake_arp(device_t dev, struct netfront_info *info) 1022 { 1023 if_t ifp; 1024 1025 ifp = info->xn_ifp; 1026 if_foreach_addr_type(ifp, AF_INET, netfront_addr_cb, ifp); 1027 } 1028 #endif 1029 1030 /** 1031 * Callback received when the backend's state changes. 1032 */ 1033 static void 1034 netfront_backend_changed(device_t dev, XenbusState newstate) 1035 { 1036 struct netfront_info *sc = device_get_softc(dev); 1037 1038 DPRINTK("newstate=%d\n", newstate); 1039 1040 CURVNET_SET(if_getvnet(sc->xn_ifp)); 1041 1042 switch (newstate) { 1043 case XenbusStateInitialising: 1044 case XenbusStateInitialised: 1045 case XenbusStateUnknown: 1046 case XenbusStateReconfigured: 1047 case XenbusStateReconfiguring: 1048 break; 1049 case XenbusStateInitWait: 1050 if (xenbus_get_state(dev) != XenbusStateInitialising) 1051 break; 1052 if (xn_connect(sc) != 0) 1053 break; 1054 /* Switch to connected state before kicking the rings. */ 1055 xenbus_set_state(sc->xbdev, XenbusStateConnected); 1056 xn_kick_rings(sc); 1057 break; 1058 case XenbusStateClosing: 1059 xenbus_set_state(dev, XenbusStateClosed); 1060 break; 1061 case XenbusStateClosed: 1062 if (sc->xn_reset) { 1063 netif_disconnect_backend(sc); 1064 xenbus_set_state(dev, XenbusStateInitialising); 1065 sc->xn_reset = false; 1066 } 1067 break; 1068 case XenbusStateConnected: 1069 #ifdef INET 1070 netfront_send_fake_arp(dev, sc); 1071 #endif 1072 break; 1073 } 1074 1075 CURVNET_RESTORE(); 1076 } 1077 1078 /** 1079 * \brief Verify that there is sufficient space in the Tx ring 1080 * buffer for a maximally sized request to be enqueued. 1081 * 1082 * A transmit request requires a transmit descriptor for each packet 1083 * fragment, plus up to 2 entries for "options" (e.g. TSO). 1084 */ 1085 static inline int 1086 xn_tx_slot_available(struct netfront_txq *txq) 1087 { 1088 1089 return (RING_FREE_REQUESTS(&txq->ring) > (MAX_TX_REQ_FRAGS + 2)); 1090 } 1091 1092 static void 1093 xn_release_tx_bufs(struct netfront_txq *txq) 1094 { 1095 int i; 1096 1097 for (i = 1; i <= NET_TX_RING_SIZE; i++) { 1098 struct mbuf *m; 1099 1100 m = txq->mbufs[i]; 1101 1102 /* 1103 * We assume that no kernel addresses are 1104 * less than NET_TX_RING_SIZE. Any entry 1105 * in the table that is below this number 1106 * must be an index from free-list tracking. 1107 */ 1108 if (((uintptr_t)m) <= NET_TX_RING_SIZE) 1109 continue; 1110 gnttab_end_foreign_access_ref(txq->grant_ref[i]); 1111 gnttab_release_grant_reference(&txq->gref_head, 1112 txq->grant_ref[i]); 1113 txq->grant_ref[i] = GRANT_REF_INVALID; 1114 add_id_to_freelist(txq->mbufs, i); 1115 txq->mbufs_cnt--; 1116 if (txq->mbufs_cnt < 0) { 1117 panic("%s: tx_chain_cnt must be >= 0", __func__); 1118 } 1119 mbuf_release(m); 1120 } 1121 } 1122 1123 static struct mbuf * 1124 xn_alloc_one_rx_buffer(struct netfront_rxq *rxq) 1125 { 1126 struct mbuf *m; 1127 1128 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); 1129 if (m == NULL) 1130 return NULL; 1131 m->m_len = m->m_pkthdr.len = MJUMPAGESIZE; 1132 1133 return (m); 1134 } 1135 1136 static void 1137 xn_alloc_rx_buffers(struct netfront_rxq *rxq) 1138 { 1139 RING_IDX req_prod; 1140 int notify; 1141 1142 XN_RX_LOCK_ASSERT(rxq); 1143 1144 if (__predict_false(rxq->info->carrier == 0)) 1145 return; 1146 1147 for (req_prod = rxq->ring.req_prod_pvt; 1148 req_prod - rxq->ring.rsp_cons < NET_RX_RING_SIZE; 1149 req_prod++) { 1150 struct mbuf *m; 1151 unsigned short id; 1152 grant_ref_t ref; 1153 struct netif_rx_request *req; 1154 unsigned long pfn; 1155 1156 m = xn_alloc_one_rx_buffer(rxq); 1157 if (m == NULL) 1158 break; 1159 1160 id = xn_rxidx(req_prod); 1161 1162 KASSERT(rxq->mbufs[id] == NULL, ("non-NULL xn_rx_chain")); 1163 rxq->mbufs[id] = m; 1164 1165 ref = gnttab_claim_grant_reference(&rxq->gref_head); 1166 KASSERT(ref != GNTTAB_LIST_END, 1167 ("reserved grant references exhuasted")); 1168 rxq->grant_ref[id] = ref; 1169 1170 pfn = atop(vtophys(mtod(m, vm_offset_t))); 1171 req = RING_GET_REQUEST(&rxq->ring, req_prod); 1172 1173 gnttab_grant_foreign_access_ref(ref, 1174 xenbus_get_otherend_id(rxq->info->xbdev), pfn, 0); 1175 req->id = id; 1176 req->gref = ref; 1177 } 1178 1179 rxq->ring.req_prod_pvt = req_prod; 1180 1181 /* Not enough requests? Try again later. */ 1182 if (req_prod - rxq->ring.rsp_cons < NET_RX_SLOTS_MIN) { 1183 callout_reset_curcpu(&rxq->rx_refill, hz/10, 1184 xn_alloc_rx_buffers_callout, rxq); 1185 return; 1186 } 1187 1188 wmb(); /* barrier so backend seens requests */ 1189 1190 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rxq->ring, notify); 1191 if (notify) 1192 xen_intr_signal(rxq->xen_intr_handle); 1193 } 1194 1195 static void xn_alloc_rx_buffers_callout(void *arg) 1196 { 1197 struct netfront_rxq *rxq; 1198 1199 rxq = (struct netfront_rxq *)arg; 1200 XN_RX_LOCK(rxq); 1201 xn_alloc_rx_buffers(rxq); 1202 XN_RX_UNLOCK(rxq); 1203 } 1204 1205 static void 1206 xn_release_rx_bufs(struct netfront_rxq *rxq) 1207 { 1208 int i, ref; 1209 struct mbuf *m; 1210 1211 for (i = 0; i < NET_RX_RING_SIZE; i++) { 1212 m = rxq->mbufs[i]; 1213 1214 if (m == NULL) 1215 continue; 1216 1217 ref = rxq->grant_ref[i]; 1218 if (ref == GRANT_REF_INVALID) 1219 continue; 1220 1221 gnttab_end_foreign_access_ref(ref); 1222 gnttab_release_grant_reference(&rxq->gref_head, ref); 1223 rxq->mbufs[i] = NULL; 1224 rxq->grant_ref[i] = GRANT_REF_INVALID; 1225 m_freem(m); 1226 } 1227 } 1228 1229 static void 1230 xn_rxeof(struct netfront_rxq *rxq) 1231 { 1232 if_t ifp; 1233 struct netfront_info *np = rxq->info; 1234 #if (defined(INET) || defined(INET6)) 1235 struct lro_ctrl *lro = &rxq->lro; 1236 #endif 1237 struct netfront_rx_info rinfo; 1238 struct netif_rx_response *rx = &rinfo.rx; 1239 struct netif_extra_info *extras = rinfo.extras; 1240 RING_IDX i, rp; 1241 struct mbuf *m; 1242 struct mbufq mbufq_rxq, mbufq_errq; 1243 int err, work_to_do; 1244 1245 XN_RX_LOCK_ASSERT(rxq); 1246 1247 if (!netfront_carrier_ok(np)) 1248 return; 1249 1250 /* XXX: there should be some sane limit. */ 1251 mbufq_init(&mbufq_errq, INT_MAX); 1252 mbufq_init(&mbufq_rxq, INT_MAX); 1253 1254 ifp = np->xn_ifp; 1255 1256 do { 1257 rp = rxq->ring.sring->rsp_prod; 1258 rmb(); /* Ensure we see queued responses up to 'rp'. */ 1259 1260 i = rxq->ring.rsp_cons; 1261 while ((i != rp)) { 1262 memcpy(rx, RING_GET_RESPONSE(&rxq->ring, i), sizeof(*rx)); 1263 memset(extras, 0, sizeof(rinfo.extras)); 1264 1265 m = NULL; 1266 err = xn_get_responses(rxq, &rinfo, rp, &i, &m); 1267 1268 if (__predict_false(err)) { 1269 if (m) 1270 (void )mbufq_enqueue(&mbufq_errq, m); 1271 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 1272 continue; 1273 } 1274 1275 m->m_pkthdr.rcvif = ifp; 1276 if (rx->flags & NETRXF_data_validated) { 1277 /* 1278 * According to mbuf(9) the correct way to tell 1279 * the stack that the checksum of an inbound 1280 * packet is correct, without it actually being 1281 * present (because the underlying interface 1282 * doesn't provide it), is to set the 1283 * CSUM_DATA_VALID and CSUM_PSEUDO_HDR flags, 1284 * and the csum_data field to 0xffff. 1285 */ 1286 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID 1287 | CSUM_PSEUDO_HDR); 1288 m->m_pkthdr.csum_data = 0xffff; 1289 } 1290 if ((rx->flags & NETRXF_extra_info) != 0 && 1291 (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type == 1292 XEN_NETIF_EXTRA_TYPE_GSO)) { 1293 m->m_pkthdr.tso_segsz = 1294 extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].u.gso.size; 1295 m->m_pkthdr.csum_flags |= CSUM_TSO; 1296 } 1297 1298 (void )mbufq_enqueue(&mbufq_rxq, m); 1299 } 1300 1301 rxq->ring.rsp_cons = i; 1302 1303 xn_alloc_rx_buffers(rxq); 1304 1305 RING_FINAL_CHECK_FOR_RESPONSES(&rxq->ring, work_to_do); 1306 } while (work_to_do); 1307 1308 mbufq_drain(&mbufq_errq); 1309 /* 1310 * Process all the mbufs after the remapping is complete. 1311 * Break the mbuf chain first though. 1312 */ 1313 while ((m = mbufq_dequeue(&mbufq_rxq)) != NULL) { 1314 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 1315 #if (defined(INET) || defined(INET6)) 1316 /* Use LRO if possible */ 1317 if ((if_getcapenable(ifp) & IFCAP_LRO) == 0 || 1318 lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) { 1319 /* 1320 * If LRO fails, pass up to the stack 1321 * directly. 1322 */ 1323 if_input(ifp, m); 1324 } 1325 #else 1326 if_input(ifp, m); 1327 #endif 1328 } 1329 1330 #if (defined(INET) || defined(INET6)) 1331 /* 1332 * Flush any outstanding LRO work 1333 */ 1334 tcp_lro_flush_all(lro); 1335 #endif 1336 } 1337 1338 static void 1339 xn_txeof(struct netfront_txq *txq) 1340 { 1341 RING_IDX i, prod; 1342 unsigned short id; 1343 if_t ifp; 1344 netif_tx_response_t *txr; 1345 struct mbuf *m; 1346 struct netfront_info *np = txq->info; 1347 1348 XN_TX_LOCK_ASSERT(txq); 1349 1350 if (!netfront_carrier_ok(np)) 1351 return; 1352 1353 ifp = np->xn_ifp; 1354 1355 do { 1356 prod = txq->ring.sring->rsp_prod; 1357 rmb(); /* Ensure we see responses up to 'rp'. */ 1358 1359 for (i = txq->ring.rsp_cons; i != prod; i++) { 1360 txr = RING_GET_RESPONSE(&txq->ring, i); 1361 if (txr->status == NETIF_RSP_NULL) 1362 continue; 1363 1364 if (txr->status != NETIF_RSP_OKAY) { 1365 printf("%s: WARNING: response is %d!\n", 1366 __func__, txr->status); 1367 } 1368 id = txr->id; 1369 m = txq->mbufs[id]; 1370 KASSERT(m != NULL, ("mbuf not found in chain")); 1371 KASSERT((uintptr_t)m > NET_TX_RING_SIZE, 1372 ("mbuf already on the free list, but we're " 1373 "trying to free it again!")); 1374 M_ASSERTVALID(m); 1375 1376 if (__predict_false(gnttab_query_foreign_access( 1377 txq->grant_ref[id]) != 0)) { 1378 panic("%s: grant id %u still in use by the " 1379 "backend", __func__, id); 1380 } 1381 gnttab_end_foreign_access_ref(txq->grant_ref[id]); 1382 gnttab_release_grant_reference( 1383 &txq->gref_head, txq->grant_ref[id]); 1384 txq->grant_ref[id] = GRANT_REF_INVALID; 1385 1386 txq->mbufs[id] = NULL; 1387 add_id_to_freelist(txq->mbufs, id); 1388 txq->mbufs_cnt--; 1389 mbuf_release(m); 1390 /* Only mark the txq active if we've freed up at least one slot to try */ 1391 if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); 1392 } 1393 txq->ring.rsp_cons = prod; 1394 1395 /* 1396 * Set a new event, then check for race with update of 1397 * tx_cons. Note that it is essential to schedule a 1398 * callback, no matter how few buffers are pending. Even if 1399 * there is space in the transmit ring, higher layers may 1400 * be blocked because too much data is outstanding: in such 1401 * cases notification from Xen is likely to be the only kick 1402 * that we'll get. 1403 */ 1404 txq->ring.sring->rsp_event = 1405 prod + ((txq->ring.sring->req_prod - prod) >> 1) + 1; 1406 1407 mb(); 1408 } while (prod != txq->ring.sring->rsp_prod); 1409 1410 if (txq->full && 1411 ((txq->ring.sring->req_prod - prod) < NET_TX_RING_SIZE)) { 1412 txq->full = false; 1413 xn_txq_start(txq); 1414 } 1415 } 1416 1417 static void 1418 xn_intr(void *xsc) 1419 { 1420 struct netfront_txq *txq = xsc; 1421 struct netfront_info *np = txq->info; 1422 struct netfront_rxq *rxq = &np->rxq[txq->id]; 1423 1424 /* kick both tx and rx */ 1425 xn_rxq_intr(rxq); 1426 xn_txq_intr(txq); 1427 } 1428 1429 static void 1430 xn_move_rx_slot(struct netfront_rxq *rxq, struct mbuf *m, 1431 grant_ref_t ref) 1432 { 1433 int new = xn_rxidx(rxq->ring.req_prod_pvt); 1434 1435 KASSERT(rxq->mbufs[new] == NULL, ("mbufs != NULL")); 1436 rxq->mbufs[new] = m; 1437 rxq->grant_ref[new] = ref; 1438 RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->id = new; 1439 RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->gref = ref; 1440 rxq->ring.req_prod_pvt++; 1441 } 1442 1443 static int 1444 xn_get_extras(struct netfront_rxq *rxq, 1445 struct netif_extra_info *extras, RING_IDX rp, RING_IDX *cons) 1446 { 1447 struct netif_extra_info *extra; 1448 1449 int err = 0; 1450 1451 do { 1452 struct mbuf *m; 1453 grant_ref_t ref; 1454 1455 if (__predict_false(*cons + 1 == rp)) { 1456 err = EINVAL; 1457 break; 1458 } 1459 1460 extra = (struct netif_extra_info *) 1461 RING_GET_RESPONSE(&rxq->ring, ++(*cons)); 1462 1463 if (__predict_false(!extra->type || 1464 extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 1465 err = EINVAL; 1466 } else { 1467 memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); 1468 } 1469 1470 m = xn_get_rx_mbuf(rxq, *cons); 1471 ref = xn_get_rx_ref(rxq, *cons); 1472 xn_move_rx_slot(rxq, m, ref); 1473 } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); 1474 1475 return err; 1476 } 1477 1478 static int 1479 xn_get_responses(struct netfront_rxq *rxq, 1480 struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, 1481 struct mbuf **list) 1482 { 1483 struct netif_rx_response *rx = &rinfo->rx; 1484 struct netif_extra_info *extras = rinfo->extras; 1485 struct mbuf *m, *m0, *m_prev; 1486 grant_ref_t ref = xn_get_rx_ref(rxq, *cons); 1487 int frags = 1; 1488 int err = 0; 1489 u_long ret __diagused; 1490 1491 m0 = m = m_prev = xn_get_rx_mbuf(rxq, *cons); 1492 1493 if (rx->flags & NETRXF_extra_info) { 1494 err = xn_get_extras(rxq, extras, rp, cons); 1495 } 1496 1497 if (m0 != NULL) { 1498 m0->m_pkthdr.len = 0; 1499 m0->m_next = NULL; 1500 } 1501 1502 for (;;) { 1503 #if 0 1504 DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n", 1505 rx->status, rx->offset, frags); 1506 #endif 1507 if (__predict_false(rx->status < 0 || 1508 rx->offset + rx->status > PAGE_SIZE)) { 1509 xn_move_rx_slot(rxq, m, ref); 1510 if (m0 == m) 1511 m0 = NULL; 1512 m = NULL; 1513 err = EINVAL; 1514 goto next_skip_queue; 1515 } 1516 1517 /* 1518 * This definitely indicates a bug, either in this driver or in 1519 * the backend driver. In future this should flag the bad 1520 * situation to the system controller to reboot the backed. 1521 */ 1522 if (ref == GRANT_REF_INVALID) { 1523 printf("%s: Bad rx response id %d.\n", __func__, rx->id); 1524 err = EINVAL; 1525 goto next; 1526 } 1527 1528 ret = gnttab_end_foreign_access_ref(ref); 1529 KASSERT(ret, ("Unable to end access to grant references")); 1530 1531 gnttab_release_grant_reference(&rxq->gref_head, ref); 1532 1533 next: 1534 if (m == NULL) 1535 break; 1536 1537 m->m_len = rx->status; 1538 m->m_data += rx->offset; 1539 m0->m_pkthdr.len += rx->status; 1540 1541 next_skip_queue: 1542 if (!(rx->flags & NETRXF_more_data)) 1543 break; 1544 1545 if (*cons + frags == rp) { 1546 if (net_ratelimit()) 1547 WPRINTK("Need more frags\n"); 1548 err = ENOENT; 1549 printf("%s: cons %u frags %u rp %u, not enough frags\n", 1550 __func__, *cons, frags, rp); 1551 break; 1552 } 1553 /* 1554 * Note that m can be NULL, if rx->status < 0 or if 1555 * rx->offset + rx->status > PAGE_SIZE above. 1556 */ 1557 m_prev = m; 1558 1559 rx = RING_GET_RESPONSE(&rxq->ring, *cons + frags); 1560 m = xn_get_rx_mbuf(rxq, *cons + frags); 1561 1562 /* 1563 * m_prev == NULL can happen if rx->status < 0 or if 1564 * rx->offset + * rx->status > PAGE_SIZE above. 1565 */ 1566 if (m_prev != NULL) 1567 m_prev->m_next = m; 1568 1569 /* 1570 * m0 can be NULL if rx->status < 0 or if * rx->offset + 1571 * rx->status > PAGE_SIZE above. 1572 */ 1573 if (m0 == NULL) 1574 m0 = m; 1575 m->m_next = NULL; 1576 ref = xn_get_rx_ref(rxq, *cons + frags); 1577 frags++; 1578 } 1579 *list = m0; 1580 *cons += frags; 1581 1582 return (err); 1583 } 1584 1585 /** 1586 * Given an mbuf chain, make sure we have enough room and then push 1587 * it onto the transmit ring. 1588 */ 1589 static int 1590 xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) 1591 { 1592 struct netfront_info *np = txq->info; 1593 if_t ifp = np->xn_ifp; 1594 int otherend_id, error, nfrags; 1595 bus_dma_segment_t *segs = txq->segs; 1596 struct mbuf_xennet *tag; 1597 bus_dmamap_t map; 1598 unsigned int i; 1599 1600 KASSERT(!SLIST_EMPTY(&txq->tags), ("no tags available")); 1601 tag = SLIST_FIRST(&txq->tags); 1602 SLIST_REMOVE_HEAD(&txq->tags, next); 1603 KASSERT(tag->count == 0, ("tag already in-use")); 1604 map = tag->dma_map; 1605 error = bus_dmamap_load_mbuf_sg(np->dma_tag, map, m_head, segs, 1606 &nfrags, 0); 1607 if (error == EFBIG || nfrags > np->maxfrags) { 1608 struct mbuf *m; 1609 1610 bus_dmamap_unload(np->dma_tag, map); 1611 m = m_defrag(m_head, M_NOWAIT); 1612 if (!m) { 1613 /* 1614 * Defrag failed, so free the mbuf and 1615 * therefore drop the packet. 1616 */ 1617 SLIST_INSERT_HEAD(&txq->tags, tag, next); 1618 m_freem(m_head); 1619 return (EMSGSIZE); 1620 } 1621 m_head = m; 1622 error = bus_dmamap_load_mbuf_sg(np->dma_tag, map, m_head, segs, 1623 &nfrags, 0); 1624 if (error != 0 || nfrags > np->maxfrags) { 1625 bus_dmamap_unload(np->dma_tag, map); 1626 SLIST_INSERT_HEAD(&txq->tags, tag, next); 1627 m_freem(m_head); 1628 return (error ?: EFBIG); 1629 } 1630 } else if (error != 0) { 1631 SLIST_INSERT_HEAD(&txq->tags, tag, next); 1632 m_freem(m_head); 1633 return (error); 1634 } 1635 1636 /** 1637 * The FreeBSD TCP stack, with TSO enabled, can produce a chain 1638 * of mbufs longer than Linux can handle. Make sure we don't 1639 * pass a too-long chain over to the other side by dropping the 1640 * packet. It doesn't look like there is currently a way to 1641 * tell the TCP stack to generate a shorter chain of packets. 1642 */ 1643 if (nfrags > MAX_TX_REQ_FRAGS) { 1644 #ifdef DEBUG 1645 printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback " 1646 "won't be able to handle it, dropping\n", 1647 __func__, nfrags, MAX_TX_REQ_FRAGS); 1648 #endif 1649 SLIST_INSERT_HEAD(&txq->tags, tag, next); 1650 bus_dmamap_unload(np->dma_tag, map); 1651 m_freem(m_head); 1652 return (EMSGSIZE); 1653 } 1654 1655 /* 1656 * This check should be redundant. We've already verified that we 1657 * have enough slots in the ring to handle a packet of maximum 1658 * size, and that our packet is less than the maximum size. Keep 1659 * it in here as an assert for now just to make certain that 1660 * chain_cnt is accurate. 1661 */ 1662 KASSERT((txq->mbufs_cnt + nfrags) <= NET_TX_RING_SIZE, 1663 ("%s: chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE " 1664 "(%d)!", __func__, (int) txq->mbufs_cnt, 1665 (int) nfrags, (int) NET_TX_RING_SIZE)); 1666 1667 /* 1668 * Start packing the mbufs in this chain into 1669 * the fragment pointers. Stop when we run out 1670 * of fragments or hit the end of the mbuf chain. 1671 */ 1672 otherend_id = xenbus_get_otherend_id(np->xbdev); 1673 m_tag_prepend(m_head, &tag->tag); 1674 for (i = 0; i < nfrags; i++) { 1675 netif_tx_request_t *tx; 1676 uintptr_t id; 1677 grant_ref_t ref; 1678 u_long mfn; /* XXX Wrong type? */ 1679 1680 tx = RING_GET_REQUEST(&txq->ring, txq->ring.req_prod_pvt); 1681 id = get_id_from_freelist(txq->mbufs); 1682 if (id == 0) 1683 panic("%s: was allocated the freelist head!\n", 1684 __func__); 1685 txq->mbufs_cnt++; 1686 if (txq->mbufs_cnt > NET_TX_RING_SIZE) 1687 panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n", 1688 __func__); 1689 mbuf_grab(m_head); 1690 txq->mbufs[id] = m_head; 1691 tx->id = id; 1692 ref = gnttab_claim_grant_reference(&txq->gref_head); 1693 KASSERT((short)ref >= 0, ("Negative ref")); 1694 mfn = atop(segs[i].ds_addr); 1695 gnttab_grant_foreign_access_ref(ref, otherend_id, 1696 mfn, GNTMAP_readonly); 1697 tx->gref = txq->grant_ref[id] = ref; 1698 tx->offset = segs[i].ds_addr & PAGE_MASK; 1699 KASSERT(tx->offset + segs[i].ds_len <= PAGE_SIZE, 1700 ("mbuf segment crosses a page boundary")); 1701 tx->flags = 0; 1702 if (i == 0) { 1703 /* 1704 * The first fragment has the entire packet 1705 * size, subsequent fragments have just the 1706 * fragment size. The backend works out the 1707 * true size of the first fragment by 1708 * subtracting the sizes of the other 1709 * fragments. 1710 */ 1711 tx->size = m_head->m_pkthdr.len; 1712 1713 /* 1714 * The first fragment contains the checksum flags 1715 * and is optionally followed by extra data for 1716 * TSO etc. 1717 */ 1718 /** 1719 * CSUM_TSO requires checksum offloading. 1720 * Some versions of FreeBSD fail to 1721 * set CSUM_TCP in the CSUM_TSO case, 1722 * so we have to test for CSUM_TSO 1723 * explicitly. 1724 */ 1725 if (m_head->m_pkthdr.csum_flags 1726 & (CSUM_DELAY_DATA | CSUM_TSO)) { 1727 tx->flags |= (NETTXF_csum_blank 1728 | NETTXF_data_validated); 1729 } 1730 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 1731 struct netif_extra_info *gso = 1732 (struct netif_extra_info *) 1733 RING_GET_REQUEST(&txq->ring, 1734 ++txq->ring.req_prod_pvt); 1735 1736 tx->flags |= NETTXF_extra_info; 1737 1738 gso->u.gso.size = m_head->m_pkthdr.tso_segsz; 1739 gso->u.gso.type = 1740 XEN_NETIF_GSO_TYPE_TCPV4; 1741 gso->u.gso.pad = 0; 1742 gso->u.gso.features = 0; 1743 1744 gso->type = XEN_NETIF_EXTRA_TYPE_GSO; 1745 gso->flags = 0; 1746 } 1747 } else { 1748 tx->size = segs[i].ds_len; 1749 } 1750 if (i != nfrags - 1) 1751 tx->flags |= NETTXF_more_data; 1752 1753 txq->ring.req_prod_pvt++; 1754 } 1755 bus_dmamap_sync(np->dma_tag, map, BUS_DMASYNC_PREWRITE); 1756 BPF_MTAP(ifp, m_head); 1757 1758 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 1759 if_inc_counter(ifp, IFCOUNTER_OBYTES, m_head->m_pkthdr.len); 1760 if (m_head->m_flags & M_MCAST) 1761 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 1762 1763 xn_txeof(txq); 1764 1765 return (0); 1766 } 1767 1768 /* equivalent of network_open() in Linux */ 1769 static void 1770 xn_ifinit_locked(struct netfront_info *np) 1771 { 1772 if_t ifp; 1773 int i; 1774 struct netfront_rxq *rxq; 1775 1776 XN_LOCK_ASSERT(np); 1777 1778 ifp = np->xn_ifp; 1779 1780 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING || !netfront_carrier_ok(np)) 1781 return; 1782 1783 xn_stop(np); 1784 1785 for (i = 0; i < np->num_queues; i++) { 1786 rxq = &np->rxq[i]; 1787 XN_RX_LOCK(rxq); 1788 xn_alloc_rx_buffers(rxq); 1789 rxq->ring.sring->rsp_event = rxq->ring.rsp_cons + 1; 1790 if (RING_HAS_UNCONSUMED_RESPONSES(&rxq->ring)) 1791 xn_rxeof(rxq); 1792 XN_RX_UNLOCK(rxq); 1793 } 1794 1795 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0); 1796 if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); 1797 if_link_state_change(ifp, LINK_STATE_UP); 1798 } 1799 1800 static void 1801 xn_ifinit(void *xsc) 1802 { 1803 struct netfront_info *sc = xsc; 1804 1805 XN_LOCK(sc); 1806 xn_ifinit_locked(sc); 1807 XN_UNLOCK(sc); 1808 } 1809 1810 static int 1811 xn_ioctl(if_t ifp, u_long cmd, caddr_t data) 1812 { 1813 struct netfront_info *sc = if_getsoftc(ifp); 1814 struct ifreq *ifr = (struct ifreq *) data; 1815 device_t dev; 1816 #ifdef INET 1817 struct ifaddr *ifa = (struct ifaddr *)data; 1818 #endif 1819 int mask, error = 0, reinit; 1820 1821 dev = sc->xbdev; 1822 1823 switch(cmd) { 1824 case SIOCSIFADDR: 1825 #ifdef INET 1826 XN_LOCK(sc); 1827 if (ifa->ifa_addr->sa_family == AF_INET) { 1828 if_setflagbits(ifp, IFF_UP, 0); 1829 if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) 1830 xn_ifinit_locked(sc); 1831 arp_ifinit(ifp, ifa); 1832 XN_UNLOCK(sc); 1833 } else { 1834 XN_UNLOCK(sc); 1835 #endif 1836 error = ether_ioctl(ifp, cmd, data); 1837 #ifdef INET 1838 } 1839 #endif 1840 break; 1841 case SIOCSIFMTU: 1842 if (if_getmtu(ifp) == ifr->ifr_mtu) 1843 break; 1844 1845 if_setmtu(ifp, ifr->ifr_mtu); 1846 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); 1847 xn_ifinit(sc); 1848 break; 1849 case SIOCSIFFLAGS: 1850 XN_LOCK(sc); 1851 if (if_getflags(ifp) & IFF_UP) { 1852 /* 1853 * If only the state of the PROMISC flag changed, 1854 * then just use the 'set promisc mode' command 1855 * instead of reinitializing the entire NIC. Doing 1856 * a full re-init means reloading the firmware and 1857 * waiting for it to start up, which may take a 1858 * second or two. 1859 */ 1860 xn_ifinit_locked(sc); 1861 } else { 1862 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1863 xn_stop(sc); 1864 } 1865 } 1866 sc->xn_if_flags = if_getflags(ifp); 1867 XN_UNLOCK(sc); 1868 break; 1869 case SIOCSIFCAP: 1870 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); 1871 reinit = 0; 1872 1873 if (mask & IFCAP_TXCSUM) { 1874 if_togglecapenable(ifp, IFCAP_TXCSUM); 1875 if_togglehwassist(ifp, XN_CSUM_FEATURES); 1876 } 1877 if (mask & IFCAP_TSO4) { 1878 if_togglecapenable(ifp, IFCAP_TSO4); 1879 if_togglehwassist(ifp, CSUM_TSO); 1880 } 1881 1882 if (mask & (IFCAP_RXCSUM | IFCAP_LRO)) { 1883 /* These Rx features require us to renegotiate. */ 1884 reinit = 1; 1885 1886 if (mask & IFCAP_RXCSUM) 1887 if_togglecapenable(ifp, IFCAP_RXCSUM); 1888 if (mask & IFCAP_LRO) 1889 if_togglecapenable(ifp, IFCAP_LRO); 1890 } 1891 1892 if (reinit == 0) 1893 break; 1894 1895 /* 1896 * We must reset the interface so the backend picks up the 1897 * new features. 1898 */ 1899 device_printf(sc->xbdev, 1900 "performing interface reset due to feature change\n"); 1901 XN_LOCK(sc); 1902 netfront_carrier_off(sc); 1903 sc->xn_reset = true; 1904 /* 1905 * NB: the pending packet queue is not flushed, since 1906 * the interface should still support the old options. 1907 */ 1908 XN_UNLOCK(sc); 1909 /* 1910 * Delete the xenstore nodes that export features. 1911 * 1912 * NB: There's a xenbus state called 1913 * "XenbusStateReconfiguring", which is what we should set 1914 * here. Sadly none of the backends know how to handle it, 1915 * and simply disconnect from the frontend, so we will just 1916 * switch back to XenbusStateInitialising in order to force 1917 * a reconnection. 1918 */ 1919 xs_rm(XST_NIL, xenbus_get_node(dev), "feature-gso-tcpv4"); 1920 xs_rm(XST_NIL, xenbus_get_node(dev), "feature-no-csum-offload"); 1921 xenbus_set_state(dev, XenbusStateClosing); 1922 1923 /* 1924 * Wait for the frontend to reconnect before returning 1925 * from the ioctl. 30s should be more than enough for any 1926 * sane backend to reconnect. 1927 */ 1928 error = tsleep(sc, 0, "xn_rst", 30*hz); 1929 break; 1930 case SIOCADDMULTI: 1931 case SIOCDELMULTI: 1932 break; 1933 case SIOCSIFMEDIA: 1934 case SIOCGIFMEDIA: 1935 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 1936 break; 1937 default: 1938 error = ether_ioctl(ifp, cmd, data); 1939 } 1940 1941 return (error); 1942 } 1943 1944 static void 1945 xn_stop(struct netfront_info *sc) 1946 { 1947 if_t ifp; 1948 1949 XN_LOCK_ASSERT(sc); 1950 1951 ifp = sc->xn_ifp; 1952 1953 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 1954 if_link_state_change(ifp, LINK_STATE_DOWN); 1955 } 1956 1957 static void 1958 xn_rebuild_rx_bufs(struct netfront_rxq *rxq) 1959 { 1960 int requeue_idx, i; 1961 grant_ref_t ref; 1962 netif_rx_request_t *req; 1963 1964 for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { 1965 struct mbuf *m; 1966 u_long pfn; 1967 1968 if (rxq->mbufs[i] == NULL) 1969 continue; 1970 1971 m = rxq->mbufs[requeue_idx] = xn_get_rx_mbuf(rxq, i); 1972 ref = rxq->grant_ref[requeue_idx] = xn_get_rx_ref(rxq, i); 1973 1974 req = RING_GET_REQUEST(&rxq->ring, requeue_idx); 1975 pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT; 1976 1977 gnttab_grant_foreign_access_ref(ref, 1978 xenbus_get_otherend_id(rxq->info->xbdev), 1979 pfn, 0); 1980 1981 req->gref = ref; 1982 req->id = requeue_idx; 1983 1984 requeue_idx++; 1985 } 1986 1987 rxq->ring.req_prod_pvt = requeue_idx; 1988 } 1989 1990 /* START of Xenolinux helper functions adapted to FreeBSD */ 1991 static int 1992 xn_connect(struct netfront_info *np) 1993 { 1994 int i, error; 1995 u_int feature_rx_copy; 1996 struct netfront_rxq *rxq; 1997 struct netfront_txq *txq; 1998 1999 error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2000 "feature-rx-copy", NULL, "%u", &feature_rx_copy); 2001 if (error != 0) 2002 feature_rx_copy = 0; 2003 2004 /* We only support rx copy. */ 2005 if (!feature_rx_copy) 2006 return (EPROTONOSUPPORT); 2007 2008 /* Recovery procedure: */ 2009 error = talk_to_backend(np->xbdev, np); 2010 if (error != 0) 2011 return (error); 2012 2013 /* Step 1: Reinitialise variables. */ 2014 xn_query_features(np); 2015 xn_configure_features(np); 2016 2017 /* Step 2: Release TX buffer */ 2018 for (i = 0; i < np->num_queues; i++) { 2019 txq = &np->txq[i]; 2020 xn_release_tx_bufs(txq); 2021 } 2022 2023 /* Step 3: Rebuild the RX buffer freelist and the RX ring itself. */ 2024 for (i = 0; i < np->num_queues; i++) { 2025 rxq = &np->rxq[i]; 2026 xn_rebuild_rx_bufs(rxq); 2027 } 2028 2029 /* Step 4: All public and private state should now be sane. Get 2030 * ready to start sending and receiving packets and give the driver 2031 * domain a kick because we've probably just requeued some 2032 * packets. 2033 */ 2034 netfront_carrier_on(np); 2035 wakeup(np); 2036 2037 return (0); 2038 } 2039 2040 static void 2041 xn_kick_rings(struct netfront_info *np) 2042 { 2043 struct netfront_rxq *rxq; 2044 struct netfront_txq *txq; 2045 int i; 2046 2047 for (i = 0; i < np->num_queues; i++) { 2048 txq = &np->txq[i]; 2049 rxq = &np->rxq[i]; 2050 xen_intr_signal(txq->xen_intr_handle); 2051 XN_TX_LOCK(txq); 2052 xn_txeof(txq); 2053 XN_TX_UNLOCK(txq); 2054 XN_RX_LOCK(rxq); 2055 xn_alloc_rx_buffers(rxq); 2056 XN_RX_UNLOCK(rxq); 2057 } 2058 } 2059 2060 static void 2061 xn_query_features(struct netfront_info *np) 2062 { 2063 int val; 2064 2065 device_printf(np->xbdev, "backend features:"); 2066 2067 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2068 "feature-sg", NULL, "%d", &val) != 0) 2069 val = 0; 2070 2071 np->maxfrags = 1; 2072 if (val) { 2073 np->maxfrags = MAX_TX_REQ_FRAGS; 2074 printf(" feature-sg"); 2075 } 2076 2077 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2078 "feature-gso-tcpv4", NULL, "%d", &val) != 0) 2079 val = 0; 2080 2081 if_setcapabilitiesbit(np->xn_ifp, 0, IFCAP_TSO4 | IFCAP_LRO); 2082 if (val) { 2083 if_setcapabilitiesbit(np->xn_ifp, IFCAP_TSO4 | IFCAP_LRO, 0); 2084 printf(" feature-gso-tcp4"); 2085 } 2086 2087 /* 2088 * HW CSUM offload is assumed to be available unless 2089 * feature-no-csum-offload is set in xenstore. 2090 */ 2091 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2092 "feature-no-csum-offload", NULL, "%d", &val) != 0) 2093 val = 0; 2094 2095 if_setcapabilitiesbit(np->xn_ifp, IFCAP_HWCSUM, 0); 2096 if (val) { 2097 if_setcapabilitiesbit(np->xn_ifp, 0, IFCAP_HWCSUM); 2098 printf(" feature-no-csum-offload"); 2099 } 2100 2101 printf("\n"); 2102 } 2103 2104 static int 2105 xn_configure_features(struct netfront_info *np) 2106 { 2107 int err, cap_enabled; 2108 #if (defined(INET) || defined(INET6)) 2109 int i; 2110 #endif 2111 if_t ifp; 2112 2113 ifp = np->xn_ifp; 2114 err = 0; 2115 2116 if ((if_getcapenable(ifp) & if_getcapabilities(ifp)) == if_getcapenable(ifp)) { 2117 /* Current options are available, no need to do anything. */ 2118 return (0); 2119 } 2120 2121 /* Try to preserve as many options as possible. */ 2122 cap_enabled = if_getcapenable(ifp); 2123 if_setcapenable(ifp, 0); 2124 if_sethwassist(ifp, 0); 2125 2126 #if (defined(INET) || defined(INET6)) 2127 if ((cap_enabled & IFCAP_LRO) != 0) 2128 for (i = 0; i < np->num_queues; i++) 2129 tcp_lro_free(&np->rxq[i].lro); 2130 if (xn_enable_lro && 2131 (if_getcapabilities(ifp) & cap_enabled & IFCAP_LRO) != 0) { 2132 if_setcapenablebit(ifp, IFCAP_LRO, 0); 2133 for (i = 0; i < np->num_queues; i++) { 2134 err = tcp_lro_init(&np->rxq[i].lro); 2135 if (err != 0) { 2136 device_printf(np->xbdev, 2137 "LRO initialization failed\n"); 2138 if_setcapenablebit(ifp, 0, IFCAP_LRO); 2139 break; 2140 } 2141 np->rxq[i].lro.ifp = ifp; 2142 } 2143 } 2144 if ((if_getcapabilities(ifp) & cap_enabled & IFCAP_TSO4) != 0) { 2145 if_setcapenablebit(ifp, IFCAP_TSO4, 0); 2146 if_sethwassistbits(ifp, CSUM_TSO, 0); 2147 } 2148 #endif 2149 if ((if_getcapabilities(ifp) & cap_enabled & IFCAP_TXCSUM) != 0) { 2150 if_setcapenablebit(ifp, IFCAP_TXCSUM, 0); 2151 if_sethwassistbits(ifp, XN_CSUM_FEATURES, 0); 2152 } 2153 if ((if_getcapabilities(ifp) & cap_enabled & IFCAP_RXCSUM) != 0) 2154 if_setcapenablebit(ifp, IFCAP_RXCSUM, 0); 2155 2156 return (err); 2157 } 2158 2159 static int 2160 xn_txq_mq_start_locked(struct netfront_txq *txq, struct mbuf *m) 2161 { 2162 struct netfront_info *np; 2163 if_t ifp; 2164 struct buf_ring *br; 2165 int error, notify; 2166 2167 np = txq->info; 2168 br = txq->br; 2169 ifp = np->xn_ifp; 2170 error = 0; 2171 2172 XN_TX_LOCK_ASSERT(txq); 2173 2174 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 || 2175 !netfront_carrier_ok(np)) { 2176 if (m != NULL) 2177 error = drbr_enqueue(ifp, br, m); 2178 return (error); 2179 } 2180 2181 if (m != NULL) { 2182 error = drbr_enqueue(ifp, br, m); 2183 if (error != 0) 2184 return (error); 2185 } 2186 2187 while ((m = drbr_peek(ifp, br)) != NULL) { 2188 if (!xn_tx_slot_available(txq)) { 2189 drbr_putback(ifp, br, m); 2190 break; 2191 } 2192 2193 error = xn_assemble_tx_request(txq, m); 2194 /* xn_assemble_tx_request always consumes the mbuf*/ 2195 if (error != 0) { 2196 drbr_advance(ifp, br); 2197 break; 2198 } 2199 2200 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&txq->ring, notify); 2201 if (notify) 2202 xen_intr_signal(txq->xen_intr_handle); 2203 2204 drbr_advance(ifp, br); 2205 } 2206 2207 if (RING_FULL(&txq->ring)) 2208 txq->full = true; 2209 2210 return (0); 2211 } 2212 2213 static int 2214 xn_txq_mq_start(if_t ifp, struct mbuf *m) 2215 { 2216 struct netfront_info *np; 2217 struct netfront_txq *txq; 2218 int i, npairs, error; 2219 2220 np = if_getsoftc(ifp); 2221 npairs = np->num_queues; 2222 2223 if (!netfront_carrier_ok(np)) 2224 return (ENOBUFS); 2225 2226 KASSERT(npairs != 0, ("called with 0 available queues")); 2227 2228 /* check if flowid is set */ 2229 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 2230 i = m->m_pkthdr.flowid % npairs; 2231 else 2232 i = curcpu % npairs; 2233 2234 txq = &np->txq[i]; 2235 2236 if (XN_TX_TRYLOCK(txq) != 0) { 2237 error = xn_txq_mq_start_locked(txq, m); 2238 XN_TX_UNLOCK(txq); 2239 } else { 2240 error = drbr_enqueue(ifp, txq->br, m); 2241 taskqueue_enqueue(txq->tq, &txq->defrtask); 2242 } 2243 2244 return (error); 2245 } 2246 2247 static void 2248 xn_qflush(if_t ifp) 2249 { 2250 struct netfront_info *np; 2251 struct netfront_txq *txq; 2252 struct mbuf *m; 2253 int i; 2254 2255 np = if_getsoftc(ifp); 2256 2257 for (i = 0; i < np->num_queues; i++) { 2258 txq = &np->txq[i]; 2259 2260 XN_TX_LOCK(txq); 2261 while ((m = buf_ring_dequeue_sc(txq->br)) != NULL) 2262 m_freem(m); 2263 XN_TX_UNLOCK(txq); 2264 } 2265 2266 if_qflush(ifp); 2267 } 2268 2269 /** 2270 * Create a network device. 2271 * @param dev Newbus device representing this virtual NIC. 2272 */ 2273 int 2274 create_netdev(device_t dev) 2275 { 2276 struct netfront_info *np; 2277 int err; 2278 if_t ifp; 2279 2280 np = device_get_softc(dev); 2281 2282 np->xbdev = dev; 2283 2284 mtx_init(&np->sc_lock, "xnsc", "netfront softc lock", MTX_DEF); 2285 2286 ifmedia_init(&np->sc_media, 0, xn_ifmedia_upd, xn_ifmedia_sts); 2287 ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); 2288 ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL); 2289 2290 err = xen_net_read_mac(dev, np->mac); 2291 if (err != 0) 2292 goto error; 2293 2294 /* Set up ifnet structure */ 2295 ifp = np->xn_ifp = if_alloc(IFT_ETHER); 2296 if_setsoftc(ifp, np); 2297 if_initname(ifp, "xn", device_get_unit(dev)); 2298 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 2299 if_setioctlfn(ifp, xn_ioctl); 2300 2301 if_settransmitfn(ifp, xn_txq_mq_start); 2302 if_setqflushfn(ifp, xn_qflush); 2303 2304 if_setinitfn(ifp, xn_ifinit); 2305 2306 if_sethwassist(ifp, XN_CSUM_FEATURES); 2307 /* Enable all supported features at device creation. */ 2308 if_setcapabilities(ifp, IFCAP_HWCSUM|IFCAP_TSO4|IFCAP_LRO); 2309 if_setcapenable(ifp, if_getcapabilities(ifp)); 2310 2311 if_sethwtsomax(ifp, 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); 2312 if_sethwtsomaxsegcount(ifp, MAX_TX_REQ_FRAGS); 2313 if_sethwtsomaxsegsize(ifp, PAGE_SIZE); 2314 2315 ether_ifattach(ifp, np->mac); 2316 netfront_carrier_off(np); 2317 2318 err = bus_dma_tag_create( 2319 bus_get_dma_tag(dev), /* parent */ 2320 1, PAGE_SIZE, /* algnmnt, boundary */ 2321 BUS_SPACE_MAXADDR, /* lowaddr */ 2322 BUS_SPACE_MAXADDR, /* highaddr */ 2323 NULL, NULL, /* filter, filterarg */ 2324 PAGE_SIZE * MAX_TX_REQ_FRAGS, /* max request size */ 2325 MAX_TX_REQ_FRAGS, /* max segments */ 2326 PAGE_SIZE, /* maxsegsize */ 2327 BUS_DMA_ALLOCNOW, /* flags */ 2328 NULL, NULL, /* lockfunc, lockarg */ 2329 &np->dma_tag); 2330 2331 return (err); 2332 2333 error: 2334 KASSERT(err != 0, ("Error path with no error code specified")); 2335 return (err); 2336 } 2337 2338 static int 2339 netfront_detach(device_t dev) 2340 { 2341 struct netfront_info *info = device_get_softc(dev); 2342 2343 DPRINTK("%s\n", xenbus_get_node(dev)); 2344 2345 netif_free(info); 2346 2347 return 0; 2348 } 2349 2350 static void 2351 netif_free(struct netfront_info *np) 2352 { 2353 2354 XN_LOCK(np); 2355 xn_stop(np); 2356 XN_UNLOCK(np); 2357 netif_disconnect_backend(np); 2358 ether_ifdetach(np->xn_ifp); 2359 free(np->rxq, M_DEVBUF); 2360 free(np->txq, M_DEVBUF); 2361 if_free(np->xn_ifp); 2362 np->xn_ifp = NULL; 2363 ifmedia_removeall(&np->sc_media); 2364 bus_dma_tag_destroy(np->dma_tag); 2365 } 2366 2367 static void 2368 netif_disconnect_backend(struct netfront_info *np) 2369 { 2370 u_int i; 2371 2372 for (i = 0; i < np->num_queues; i++) { 2373 XN_RX_LOCK(&np->rxq[i]); 2374 XN_TX_LOCK(&np->txq[i]); 2375 } 2376 netfront_carrier_off(np); 2377 for (i = 0; i < np->num_queues; i++) { 2378 XN_RX_UNLOCK(&np->rxq[i]); 2379 XN_TX_UNLOCK(&np->txq[i]); 2380 } 2381 2382 for (i = 0; i < np->num_queues; i++) { 2383 disconnect_rxq(&np->rxq[i]); 2384 disconnect_txq(&np->txq[i]); 2385 } 2386 } 2387 2388 static int 2389 xn_ifmedia_upd(if_t ifp) 2390 { 2391 2392 return (0); 2393 } 2394 2395 static void 2396 xn_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr) 2397 { 2398 2399 ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE; 2400 ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; 2401 } 2402 2403 /* ** Driver registration ** */ 2404 static device_method_t netfront_methods[] = { 2405 /* Device interface */ 2406 DEVMETHOD(device_probe, netfront_probe), 2407 DEVMETHOD(device_attach, netfront_attach), 2408 DEVMETHOD(device_detach, netfront_detach), 2409 DEVMETHOD(device_shutdown, bus_generic_shutdown), 2410 DEVMETHOD(device_suspend, netfront_suspend), 2411 DEVMETHOD(device_resume, netfront_resume), 2412 2413 /* Xenbus interface */ 2414 DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed), 2415 2416 DEVMETHOD_END 2417 }; 2418 2419 static driver_t netfront_driver = { 2420 "xn", 2421 netfront_methods, 2422 sizeof(struct netfront_info), 2423 }; 2424 2425 DRIVER_MODULE(xe, xenbusb_front, netfront_driver, NULL, NULL); 2426