1 /*- 2 * Copyright (c) 2004-2006 Kip Macy 3 * Copyright (c) 2015 Wei Liu <wei.liu2@citrix.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/sockio.h> 36 #include <sys/limits.h> 37 #include <sys/mbuf.h> 38 #include <sys/malloc.h> 39 #include <sys/module.h> 40 #include <sys/kernel.h> 41 #include <sys/socket.h> 42 #include <sys/sysctl.h> 43 #include <sys/taskqueue.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_arp.h> 48 #include <net/ethernet.h> 49 #include <net/if_media.h> 50 #include <net/bpf.h> 51 #include <net/if_types.h> 52 53 #include <netinet/in.h> 54 #include <netinet/ip.h> 55 #include <netinet/if_ether.h> 56 #include <netinet/tcp.h> 57 #include <netinet/tcp_lro.h> 58 59 #include <vm/vm.h> 60 #include <vm/pmap.h> 61 62 #include <sys/bus.h> 63 64 #include <xen/xen-os.h> 65 #include <xen/hypervisor.h> 66 #include <xen/xen_intr.h> 67 #include <xen/gnttab.h> 68 #include <xen/interface/memory.h> 69 #include <xen/interface/io/netif.h> 70 #include <xen/xenbus/xenbusvar.h> 71 72 #include "xenbus_if.h" 73 74 /* Features supported by all backends. TSO and LRO can be negotiated */ 75 #define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) 76 77 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) 78 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) 79 80 #define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1) 81 82 /* 83 * Should the driver do LRO on the RX end 84 * this can be toggled on the fly, but the 85 * interface must be reset (down/up) for it 86 * to take effect. 87 */ 88 static int xn_enable_lro = 1; 89 TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro); 90 91 /* 92 * Number of pairs of queues. 93 */ 94 static unsigned long xn_num_queues = 4; 95 TUNABLE_ULONG("hw.xn.num_queues", &xn_num_queues); 96 97 /** 98 * \brief The maximum allowed data fragments in a single transmit 99 * request. 100 * 101 * This limit is imposed by the backend driver. We assume here that 102 * we are dealing with a Linux driver domain and have set our limit 103 * to mirror the Linux MAX_SKB_FRAGS constant. 104 */ 105 #define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2) 106 107 #define RX_COPY_THRESHOLD 256 108 109 #define net_ratelimit() 0 110 111 struct netfront_rxq; 112 struct netfront_txq; 113 struct netfront_info; 114 struct netfront_rx_info; 115 116 static void xn_txeof(struct netfront_txq *); 117 static void xn_rxeof(struct netfront_rxq *); 118 static void xn_alloc_rx_buffers(struct netfront_rxq *); 119 static void xn_alloc_rx_buffers_callout(void *arg); 120 121 static void xn_release_rx_bufs(struct netfront_rxq *); 122 static void xn_release_tx_bufs(struct netfront_txq *); 123 124 static void xn_rxq_intr(struct netfront_rxq *); 125 static void xn_txq_intr(struct netfront_txq *); 126 static void xn_intr(void *); 127 static inline int xn_count_frags(struct mbuf *m); 128 static int xn_assemble_tx_request(struct netfront_txq *, struct mbuf *); 129 static int xn_ioctl(struct ifnet *, u_long, caddr_t); 130 static void xn_ifinit_locked(struct netfront_info *); 131 static void xn_ifinit(void *); 132 static void xn_stop(struct netfront_info *); 133 static void xn_query_features(struct netfront_info *np); 134 static int xn_configure_features(struct netfront_info *np); 135 static void netif_free(struct netfront_info *info); 136 static int netfront_detach(device_t dev); 137 138 static int xn_txq_mq_start_locked(struct netfront_txq *, struct mbuf *); 139 static int xn_txq_mq_start(struct ifnet *, struct mbuf *); 140 141 static int talk_to_backend(device_t dev, struct netfront_info *info); 142 static int create_netdev(device_t dev); 143 static void netif_disconnect_backend(struct netfront_info *info); 144 static int setup_device(device_t dev, struct netfront_info *info, 145 unsigned long); 146 static int xn_ifmedia_upd(struct ifnet *ifp); 147 static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); 148 149 static int xn_connect(struct netfront_info *); 150 static void xn_kick_rings(struct netfront_info *); 151 152 static int xn_get_responses(struct netfront_rxq *, 153 struct netfront_rx_info *, RING_IDX, RING_IDX *, 154 struct mbuf **); 155 156 #define virt_to_mfn(x) (vtophys(x) >> PAGE_SHIFT) 157 158 #define INVALID_P2M_ENTRY (~0UL) 159 160 struct xn_rx_stats 161 { 162 u_long rx_packets; /* total packets received */ 163 u_long rx_bytes; /* total bytes received */ 164 u_long rx_errors; /* bad packets received */ 165 }; 166 167 struct xn_tx_stats 168 { 169 u_long tx_packets; /* total packets transmitted */ 170 u_long tx_bytes; /* total bytes transmitted */ 171 u_long tx_errors; /* packet transmit problems */ 172 }; 173 174 #define XN_QUEUE_NAME_LEN 8 /* xn{t,r}x_%u, allow for two digits */ 175 struct netfront_rxq { 176 struct netfront_info *info; 177 u_int id; 178 char name[XN_QUEUE_NAME_LEN]; 179 struct mtx lock; 180 181 int ring_ref; 182 netif_rx_front_ring_t ring; 183 xen_intr_handle_t xen_intr_handle; 184 185 grant_ref_t gref_head; 186 grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; 187 188 struct mbuf *mbufs[NET_RX_RING_SIZE + 1]; 189 190 struct lro_ctrl lro; 191 192 struct callout rx_refill; 193 194 struct xn_rx_stats stats; 195 }; 196 197 struct netfront_txq { 198 struct netfront_info *info; 199 u_int id; 200 char name[XN_QUEUE_NAME_LEN]; 201 struct mtx lock; 202 203 int ring_ref; 204 netif_tx_front_ring_t ring; 205 xen_intr_handle_t xen_intr_handle; 206 207 grant_ref_t gref_head; 208 grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; 209 210 struct mbuf *mbufs[NET_TX_RING_SIZE + 1]; 211 int mbufs_cnt; 212 struct buf_ring *br; 213 214 struct taskqueue *tq; 215 struct task defrtask; 216 217 bool full; 218 219 struct xn_tx_stats stats; 220 }; 221 222 struct netfront_info { 223 struct ifnet *xn_ifp; 224 225 struct mtx sc_lock; 226 227 u_int num_queues; 228 struct netfront_rxq *rxq; 229 struct netfront_txq *txq; 230 231 u_int carrier; 232 u_int maxfrags; 233 234 device_t xbdev; 235 uint8_t mac[ETHER_ADDR_LEN]; 236 237 int xn_if_flags; 238 239 struct ifmedia sc_media; 240 241 bool xn_reset; 242 }; 243 244 struct netfront_rx_info { 245 struct netif_rx_response rx; 246 struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; 247 }; 248 249 #define XN_RX_LOCK(_q) mtx_lock(&(_q)->lock) 250 #define XN_RX_UNLOCK(_q) mtx_unlock(&(_q)->lock) 251 252 #define XN_TX_LOCK(_q) mtx_lock(&(_q)->lock) 253 #define XN_TX_TRYLOCK(_q) mtx_trylock(&(_q)->lock) 254 #define XN_TX_UNLOCK(_q) mtx_unlock(&(_q)->lock) 255 256 #define XN_LOCK(_sc) mtx_lock(&(_sc)->sc_lock); 257 #define XN_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_lock); 258 259 #define XN_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_lock, MA_OWNED); 260 #define XN_RX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); 261 #define XN_TX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); 262 263 #define netfront_carrier_on(netif) ((netif)->carrier = 1) 264 #define netfront_carrier_off(netif) ((netif)->carrier = 0) 265 #define netfront_carrier_ok(netif) ((netif)->carrier) 266 267 /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */ 268 269 static inline void 270 add_id_to_freelist(struct mbuf **list, uintptr_t id) 271 { 272 273 KASSERT(id != 0, 274 ("%s: the head item (0) must always be free.", __func__)); 275 list[id] = list[0]; 276 list[0] = (struct mbuf *)id; 277 } 278 279 static inline unsigned short 280 get_id_from_freelist(struct mbuf **list) 281 { 282 uintptr_t id; 283 284 id = (uintptr_t)list[0]; 285 KASSERT(id != 0, 286 ("%s: the head item (0) must always remain free.", __func__)); 287 list[0] = list[id]; 288 return (id); 289 } 290 291 static inline int 292 xn_rxidx(RING_IDX idx) 293 { 294 295 return idx & (NET_RX_RING_SIZE - 1); 296 } 297 298 static inline struct mbuf * 299 xn_get_rx_mbuf(struct netfront_rxq *rxq, RING_IDX ri) 300 { 301 int i; 302 struct mbuf *m; 303 304 i = xn_rxidx(ri); 305 m = rxq->mbufs[i]; 306 rxq->mbufs[i] = NULL; 307 return (m); 308 } 309 310 static inline grant_ref_t 311 xn_get_rx_ref(struct netfront_rxq *rxq, RING_IDX ri) 312 { 313 int i = xn_rxidx(ri); 314 grant_ref_t ref = rxq->grant_ref[i]; 315 316 KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n")); 317 rxq->grant_ref[i] = GRANT_REF_INVALID; 318 return (ref); 319 } 320 321 #define IPRINTK(fmt, args...) \ 322 printf("[XEN] " fmt, ##args) 323 #ifdef INVARIANTS 324 #define WPRINTK(fmt, args...) \ 325 printf("[XEN] " fmt, ##args) 326 #else 327 #define WPRINTK(fmt, args...) 328 #endif 329 #ifdef DEBUG 330 #define DPRINTK(fmt, args...) \ 331 printf("[XEN] %s: " fmt, __func__, ##args) 332 #else 333 #define DPRINTK(fmt, args...) 334 #endif 335 336 /** 337 * Read the 'mac' node at the given device's node in the store, and parse that 338 * as colon-separated octets, placing result the given mac array. mac must be 339 * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h). 340 * Return 0 on success, or errno on error. 341 */ 342 static int 343 xen_net_read_mac(device_t dev, uint8_t mac[]) 344 { 345 int error, i; 346 char *s, *e, *macstr; 347 const char *path; 348 349 path = xenbus_get_node(dev); 350 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 351 if (error == ENOENT) { 352 /* 353 * Deal with missing mac XenStore nodes on devices with 354 * HVM emulation (the 'ioemu' configuration attribute) 355 * enabled. 356 * 357 * The HVM emulator may execute in a stub device model 358 * domain which lacks the permission, only given to Dom0, 359 * to update the guest's XenStore tree. For this reason, 360 * the HVM emulator doesn't even attempt to write the 361 * front-side mac node, even when operating in Dom0. 362 * However, there should always be a mac listed in the 363 * backend tree. Fallback to this version if our query 364 * of the front side XenStore location doesn't find 365 * anything. 366 */ 367 path = xenbus_get_otherend_path(dev); 368 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 369 } 370 if (error != 0) { 371 xenbus_dev_fatal(dev, error, "parsing %s/mac", path); 372 return (error); 373 } 374 375 s = macstr; 376 for (i = 0; i < ETHER_ADDR_LEN; i++) { 377 mac[i] = strtoul(s, &e, 16); 378 if (s == e || (e[0] != ':' && e[0] != 0)) { 379 free(macstr, M_XENBUS); 380 return (ENOENT); 381 } 382 s = &e[1]; 383 } 384 free(macstr, M_XENBUS); 385 return (0); 386 } 387 388 /** 389 * Entry point to this code when a new device is created. Allocate the basic 390 * structures and the ring buffers for communication with the backend, and 391 * inform the backend of the appropriate details for those. Switch to 392 * Connected state. 393 */ 394 static int 395 netfront_probe(device_t dev) 396 { 397 398 if (xen_hvm_domain() && xen_disable_pv_nics != 0) 399 return (ENXIO); 400 401 if (!strcmp(xenbus_get_type(dev), "vif")) { 402 device_set_desc(dev, "Virtual Network Interface"); 403 return (0); 404 } 405 406 return (ENXIO); 407 } 408 409 static int 410 netfront_attach(device_t dev) 411 { 412 int err; 413 414 err = create_netdev(dev); 415 if (err != 0) { 416 xenbus_dev_fatal(dev, err, "creating netdev"); 417 return (err); 418 } 419 420 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), 421 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 422 OID_AUTO, "enable_lro", CTLFLAG_RW, 423 &xn_enable_lro, 0, "Large Receive Offload"); 424 425 SYSCTL_ADD_ULONG(device_get_sysctl_ctx(dev), 426 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 427 OID_AUTO, "num_queues", CTLFLAG_RD, 428 &xn_num_queues, "Number of pairs of queues"); 429 430 return (0); 431 } 432 433 static int 434 netfront_suspend(device_t dev) 435 { 436 struct netfront_info *np = device_get_softc(dev); 437 u_int i; 438 439 for (i = 0; i < np->num_queues; i++) { 440 XN_RX_LOCK(&np->rxq[i]); 441 XN_TX_LOCK(&np->txq[i]); 442 } 443 netfront_carrier_off(np); 444 for (i = 0; i < np->num_queues; i++) { 445 XN_RX_UNLOCK(&np->rxq[i]); 446 XN_TX_UNLOCK(&np->txq[i]); 447 } 448 return (0); 449 } 450 451 /** 452 * We are reconnecting to the backend, due to a suspend/resume, or a backend 453 * driver restart. We tear down our netif structure and recreate it, but 454 * leave the device-layer structures intact so that this is transparent to the 455 * rest of the kernel. 456 */ 457 static int 458 netfront_resume(device_t dev) 459 { 460 struct netfront_info *info = device_get_softc(dev); 461 462 netif_disconnect_backend(info); 463 return (0); 464 } 465 466 static int 467 write_queue_xenstore_keys(device_t dev, 468 struct netfront_rxq *rxq, 469 struct netfront_txq *txq, 470 struct xs_transaction *xst, bool hierarchy) 471 { 472 int err; 473 const char *message; 474 const char *node = xenbus_get_node(dev); 475 char *path; 476 size_t path_size; 477 478 KASSERT(rxq->id == txq->id, ("Mismatch between RX and TX queue ids")); 479 /* Split event channel support is not yet there. */ 480 KASSERT(rxq->xen_intr_handle == txq->xen_intr_handle, 481 ("Split event channels are not supported")); 482 483 if (hierarchy) { 484 path_size = strlen(node) + 10; 485 path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); 486 snprintf(path, path_size, "%s/queue-%u", node, rxq->id); 487 } else { 488 path_size = strlen(node) + 1; 489 path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); 490 snprintf(path, path_size, "%s", node); 491 } 492 493 err = xs_printf(*xst, path, "tx-ring-ref","%u", txq->ring_ref); 494 if (err != 0) { 495 message = "writing tx ring-ref"; 496 goto error; 497 } 498 err = xs_printf(*xst, path, "rx-ring-ref","%u", rxq->ring_ref); 499 if (err != 0) { 500 message = "writing rx ring-ref"; 501 goto error; 502 } 503 err = xs_printf(*xst, path, "event-channel", "%u", 504 xen_intr_port(rxq->xen_intr_handle)); 505 if (err != 0) { 506 message = "writing event-channel"; 507 goto error; 508 } 509 510 free(path, M_DEVBUF); 511 512 return (0); 513 514 error: 515 free(path, M_DEVBUF); 516 xenbus_dev_fatal(dev, err, "%s", message); 517 518 return (err); 519 } 520 521 /* Common code used when first setting up, and when resuming. */ 522 static int 523 talk_to_backend(device_t dev, struct netfront_info *info) 524 { 525 const char *message; 526 struct xs_transaction xst; 527 const char *node = xenbus_get_node(dev); 528 int err; 529 unsigned long num_queues, max_queues = 0; 530 unsigned int i; 531 532 err = xen_net_read_mac(dev, info->mac); 533 if (err != 0) { 534 xenbus_dev_fatal(dev, err, "parsing %s/mac", node); 535 goto out; 536 } 537 538 err = xs_scanf(XST_NIL, xenbus_get_otherend_path(info->xbdev), 539 "multi-queue-max-queues", NULL, "%lu", &max_queues); 540 if (err != 0) 541 max_queues = 1; 542 num_queues = xn_num_queues; 543 if (num_queues > max_queues) 544 num_queues = max_queues; 545 546 err = setup_device(dev, info, num_queues); 547 if (err != 0) 548 goto out; 549 550 again: 551 err = xs_transaction_start(&xst); 552 if (err != 0) { 553 xenbus_dev_fatal(dev, err, "starting transaction"); 554 goto free; 555 } 556 557 if (info->num_queues == 1) { 558 err = write_queue_xenstore_keys(dev, &info->rxq[0], 559 &info->txq[0], &xst, false); 560 if (err != 0) 561 goto abort_transaction_no_def_error; 562 } else { 563 err = xs_printf(xst, node, "multi-queue-num-queues", 564 "%u", info->num_queues); 565 if (err != 0) { 566 message = "writing multi-queue-num-queues"; 567 goto abort_transaction; 568 } 569 570 for (i = 0; i < info->num_queues; i++) { 571 err = write_queue_xenstore_keys(dev, &info->rxq[i], 572 &info->txq[i], &xst, true); 573 if (err != 0) 574 goto abort_transaction_no_def_error; 575 } 576 } 577 578 err = xs_printf(xst, node, "request-rx-copy", "%u", 1); 579 if (err != 0) { 580 message = "writing request-rx-copy"; 581 goto abort_transaction; 582 } 583 err = xs_printf(xst, node, "feature-rx-notify", "%d", 1); 584 if (err != 0) { 585 message = "writing feature-rx-notify"; 586 goto abort_transaction; 587 } 588 err = xs_printf(xst, node, "feature-sg", "%d", 1); 589 if (err != 0) { 590 message = "writing feature-sg"; 591 goto abort_transaction; 592 } 593 if ((info->xn_ifp->if_capenable & IFCAP_LRO) != 0) { 594 err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1); 595 if (err != 0) { 596 message = "writing feature-gso-tcpv4"; 597 goto abort_transaction; 598 } 599 } 600 if ((info->xn_ifp->if_capenable & IFCAP_RXCSUM) == 0) { 601 err = xs_printf(xst, node, "feature-no-csum-offload", "%d", 1); 602 if (err != 0) { 603 message = "writing feature-no-csum-offload"; 604 goto abort_transaction; 605 } 606 } 607 608 err = xs_transaction_end(xst, 0); 609 if (err != 0) { 610 if (err == EAGAIN) 611 goto again; 612 xenbus_dev_fatal(dev, err, "completing transaction"); 613 goto free; 614 } 615 616 return 0; 617 618 abort_transaction: 619 xenbus_dev_fatal(dev, err, "%s", message); 620 abort_transaction_no_def_error: 621 xs_transaction_end(xst, 1); 622 free: 623 netif_free(info); 624 out: 625 return (err); 626 } 627 628 static void 629 xn_rxq_intr(struct netfront_rxq *rxq) 630 { 631 632 XN_RX_LOCK(rxq); 633 xn_rxeof(rxq); 634 XN_RX_UNLOCK(rxq); 635 } 636 637 static void 638 xn_txq_start(struct netfront_txq *txq) 639 { 640 struct netfront_info *np = txq->info; 641 struct ifnet *ifp = np->xn_ifp; 642 643 XN_TX_LOCK_ASSERT(txq); 644 if (!drbr_empty(ifp, txq->br)) 645 xn_txq_mq_start_locked(txq, NULL); 646 } 647 648 static void 649 xn_txq_intr(struct netfront_txq *txq) 650 { 651 652 XN_TX_LOCK(txq); 653 if (RING_HAS_UNCONSUMED_RESPONSES(&txq->ring)) 654 xn_txeof(txq); 655 xn_txq_start(txq); 656 XN_TX_UNLOCK(txq); 657 } 658 659 static void 660 xn_txq_tq_deferred(void *xtxq, int pending) 661 { 662 struct netfront_txq *txq = xtxq; 663 664 XN_TX_LOCK(txq); 665 xn_txq_start(txq); 666 XN_TX_UNLOCK(txq); 667 } 668 669 static void 670 disconnect_rxq(struct netfront_rxq *rxq) 671 { 672 673 xn_release_rx_bufs(rxq); 674 gnttab_free_grant_references(rxq->gref_head); 675 gnttab_end_foreign_access(rxq->ring_ref, NULL); 676 /* 677 * No split event channel support at the moment, handle will 678 * be unbound in tx. So no need to call xen_intr_unbind here, 679 * but we do want to reset the handler to 0. 680 */ 681 rxq->xen_intr_handle = 0; 682 } 683 684 static void 685 destroy_rxq(struct netfront_rxq *rxq) 686 { 687 688 callout_drain(&rxq->rx_refill); 689 free(rxq->ring.sring, M_DEVBUF); 690 } 691 692 static void 693 destroy_rxqs(struct netfront_info *np) 694 { 695 int i; 696 697 for (i = 0; i < np->num_queues; i++) 698 destroy_rxq(&np->rxq[i]); 699 700 free(np->rxq, M_DEVBUF); 701 np->rxq = NULL; 702 } 703 704 static int 705 setup_rxqs(device_t dev, struct netfront_info *info, 706 unsigned long num_queues) 707 { 708 int q, i; 709 int error; 710 netif_rx_sring_t *rxs; 711 struct netfront_rxq *rxq; 712 713 info->rxq = malloc(sizeof(struct netfront_rxq) * num_queues, 714 M_DEVBUF, M_WAITOK|M_ZERO); 715 716 for (q = 0; q < num_queues; q++) { 717 rxq = &info->rxq[q]; 718 719 rxq->id = q; 720 rxq->info = info; 721 rxq->ring_ref = GRANT_REF_INVALID; 722 rxq->ring.sring = NULL; 723 snprintf(rxq->name, XN_QUEUE_NAME_LEN, "xnrx_%u", q); 724 mtx_init(&rxq->lock, rxq->name, "netfront receive lock", 725 MTX_DEF); 726 727 for (i = 0; i <= NET_RX_RING_SIZE; i++) { 728 rxq->mbufs[i] = NULL; 729 rxq->grant_ref[i] = GRANT_REF_INVALID; 730 } 731 732 /* Start resources allocation */ 733 734 if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, 735 &rxq->gref_head) != 0) { 736 device_printf(dev, "allocating rx gref"); 737 error = ENOMEM; 738 goto fail; 739 } 740 741 rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, 742 M_WAITOK|M_ZERO); 743 SHARED_RING_INIT(rxs); 744 FRONT_RING_INIT(&rxq->ring, rxs, PAGE_SIZE); 745 746 error = xenbus_grant_ring(dev, virt_to_mfn(rxs), 747 &rxq->ring_ref); 748 if (error != 0) { 749 device_printf(dev, "granting rx ring page"); 750 goto fail_grant_ring; 751 } 752 753 callout_init(&rxq->rx_refill, 1); 754 } 755 756 return (0); 757 758 fail_grant_ring: 759 gnttab_free_grant_references(rxq->gref_head); 760 free(rxq->ring.sring, M_DEVBUF); 761 fail: 762 for (; q >= 0; q--) { 763 disconnect_rxq(&info->rxq[q]); 764 destroy_rxq(&info->rxq[q]); 765 } 766 767 free(info->rxq, M_DEVBUF); 768 return (error); 769 } 770 771 static void 772 disconnect_txq(struct netfront_txq *txq) 773 { 774 775 xn_release_tx_bufs(txq); 776 gnttab_free_grant_references(txq->gref_head); 777 gnttab_end_foreign_access(txq->ring_ref, NULL); 778 xen_intr_unbind(&txq->xen_intr_handle); 779 } 780 781 static void 782 destroy_txq(struct netfront_txq *txq) 783 { 784 785 free(txq->ring.sring, M_DEVBUF); 786 buf_ring_free(txq->br, M_DEVBUF); 787 taskqueue_drain_all(txq->tq); 788 taskqueue_free(txq->tq); 789 } 790 791 static void 792 destroy_txqs(struct netfront_info *np) 793 { 794 int i; 795 796 for (i = 0; i < np->num_queues; i++) 797 destroy_txq(&np->txq[i]); 798 799 free(np->txq, M_DEVBUF); 800 np->txq = NULL; 801 } 802 803 static int 804 setup_txqs(device_t dev, struct netfront_info *info, 805 unsigned long num_queues) 806 { 807 int q, i; 808 int error; 809 netif_tx_sring_t *txs; 810 struct netfront_txq *txq; 811 812 info->txq = malloc(sizeof(struct netfront_txq) * num_queues, 813 M_DEVBUF, M_WAITOK|M_ZERO); 814 815 for (q = 0; q < num_queues; q++) { 816 txq = &info->txq[q]; 817 818 txq->id = q; 819 txq->info = info; 820 821 txq->ring_ref = GRANT_REF_INVALID; 822 txq->ring.sring = NULL; 823 824 snprintf(txq->name, XN_QUEUE_NAME_LEN, "xntx_%u", q); 825 826 mtx_init(&txq->lock, txq->name, "netfront transmit lock", 827 MTX_DEF); 828 829 for (i = 0; i <= NET_TX_RING_SIZE; i++) { 830 txq->mbufs[i] = (void *) ((u_long) i+1); 831 txq->grant_ref[i] = GRANT_REF_INVALID; 832 } 833 txq->mbufs[NET_TX_RING_SIZE] = (void *)0; 834 835 /* Start resources allocation. */ 836 837 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, 838 &txq->gref_head) != 0) { 839 device_printf(dev, "failed to allocate tx grant refs\n"); 840 error = ENOMEM; 841 goto fail; 842 } 843 844 txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, 845 M_WAITOK|M_ZERO); 846 SHARED_RING_INIT(txs); 847 FRONT_RING_INIT(&txq->ring, txs, PAGE_SIZE); 848 849 error = xenbus_grant_ring(dev, virt_to_mfn(txs), 850 &txq->ring_ref); 851 if (error != 0) { 852 device_printf(dev, "failed to grant tx ring\n"); 853 goto fail_grant_ring; 854 } 855 856 txq->br = buf_ring_alloc(NET_TX_RING_SIZE, M_DEVBUF, 857 M_WAITOK, &txq->lock); 858 TASK_INIT(&txq->defrtask, 0, xn_txq_tq_deferred, txq); 859 860 txq->tq = taskqueue_create(txq->name, M_WAITOK, 861 taskqueue_thread_enqueue, &txq->tq); 862 863 error = taskqueue_start_threads(&txq->tq, 1, PI_NET, 864 "%s txq %d", device_get_nameunit(dev), txq->id); 865 if (error != 0) { 866 device_printf(dev, "failed to start tx taskq %d\n", 867 txq->id); 868 goto fail_start_thread; 869 } 870 871 error = xen_intr_alloc_and_bind_local_port(dev, 872 xenbus_get_otherend_id(dev), /* filter */ NULL, xn_intr, 873 &info->txq[q], INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY, 874 &txq->xen_intr_handle); 875 876 if (error != 0) { 877 device_printf(dev, "xen_intr_alloc_and_bind_local_port failed\n"); 878 goto fail_bind_port; 879 } 880 } 881 882 return (0); 883 884 fail_bind_port: 885 taskqueue_drain_all(txq->tq); 886 fail_start_thread: 887 buf_ring_free(txq->br, M_DEVBUF); 888 taskqueue_free(txq->tq); 889 gnttab_end_foreign_access(txq->ring_ref, NULL); 890 fail_grant_ring: 891 gnttab_free_grant_references(txq->gref_head); 892 free(txq->ring.sring, M_DEVBUF); 893 fail: 894 for (; q >= 0; q--) { 895 disconnect_txq(&info->txq[q]); 896 destroy_txq(&info->txq[q]); 897 } 898 899 free(info->txq, M_DEVBUF); 900 return (error); 901 } 902 903 static int 904 setup_device(device_t dev, struct netfront_info *info, 905 unsigned long num_queues) 906 { 907 int error; 908 int q; 909 910 if (info->txq) 911 destroy_txqs(info); 912 913 if (info->rxq) 914 destroy_rxqs(info); 915 916 info->num_queues = 0; 917 918 error = setup_rxqs(dev, info, num_queues); 919 if (error != 0) 920 goto out; 921 error = setup_txqs(dev, info, num_queues); 922 if (error != 0) 923 goto out; 924 925 info->num_queues = num_queues; 926 927 /* No split event channel at the moment. */ 928 for (q = 0; q < num_queues; q++) 929 info->rxq[q].xen_intr_handle = info->txq[q].xen_intr_handle; 930 931 return (0); 932 933 out: 934 KASSERT(error != 0, ("Error path taken without providing an error code")); 935 return (error); 936 } 937 938 #ifdef INET 939 /** 940 * If this interface has an ipv4 address, send an arp for it. This 941 * helps to get the network going again after migrating hosts. 942 */ 943 static void 944 netfront_send_fake_arp(device_t dev, struct netfront_info *info) 945 { 946 struct ifnet *ifp; 947 struct ifaddr *ifa; 948 949 ifp = info->xn_ifp; 950 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 951 if (ifa->ifa_addr->sa_family == AF_INET) { 952 arp_ifinit(ifp, ifa); 953 } 954 } 955 } 956 #endif 957 958 /** 959 * Callback received when the backend's state changes. 960 */ 961 static void 962 netfront_backend_changed(device_t dev, XenbusState newstate) 963 { 964 struct netfront_info *sc = device_get_softc(dev); 965 966 DPRINTK("newstate=%d\n", newstate); 967 968 switch (newstate) { 969 case XenbusStateInitialising: 970 case XenbusStateInitialised: 971 case XenbusStateUnknown: 972 case XenbusStateReconfigured: 973 case XenbusStateReconfiguring: 974 break; 975 case XenbusStateInitWait: 976 if (xenbus_get_state(dev) != XenbusStateInitialising) 977 break; 978 if (xn_connect(sc) != 0) 979 break; 980 /* Switch to connected state before kicking the rings. */ 981 xenbus_set_state(sc->xbdev, XenbusStateConnected); 982 xn_kick_rings(sc); 983 break; 984 case XenbusStateClosing: 985 xenbus_set_state(dev, XenbusStateClosed); 986 break; 987 case XenbusStateClosed: 988 if (sc->xn_reset) { 989 netif_disconnect_backend(sc); 990 xenbus_set_state(dev, XenbusStateInitialising); 991 sc->xn_reset = false; 992 } 993 break; 994 case XenbusStateConnected: 995 #ifdef INET 996 netfront_send_fake_arp(dev, sc); 997 #endif 998 break; 999 } 1000 } 1001 1002 /** 1003 * \brief Verify that there is sufficient space in the Tx ring 1004 * buffer for a maximally sized request to be enqueued. 1005 * 1006 * A transmit request requires a transmit descriptor for each packet 1007 * fragment, plus up to 2 entries for "options" (e.g. TSO). 1008 */ 1009 static inline int 1010 xn_tx_slot_available(struct netfront_txq *txq) 1011 { 1012 1013 return (RING_FREE_REQUESTS(&txq->ring) > (MAX_TX_REQ_FRAGS + 2)); 1014 } 1015 1016 static void 1017 xn_release_tx_bufs(struct netfront_txq *txq) 1018 { 1019 int i; 1020 1021 for (i = 1; i <= NET_TX_RING_SIZE; i++) { 1022 struct mbuf *m; 1023 1024 m = txq->mbufs[i]; 1025 1026 /* 1027 * We assume that no kernel addresses are 1028 * less than NET_TX_RING_SIZE. Any entry 1029 * in the table that is below this number 1030 * must be an index from free-list tracking. 1031 */ 1032 if (((uintptr_t)m) <= NET_TX_RING_SIZE) 1033 continue; 1034 gnttab_end_foreign_access_ref(txq->grant_ref[i]); 1035 gnttab_release_grant_reference(&txq->gref_head, 1036 txq->grant_ref[i]); 1037 txq->grant_ref[i] = GRANT_REF_INVALID; 1038 add_id_to_freelist(txq->mbufs, i); 1039 txq->mbufs_cnt--; 1040 if (txq->mbufs_cnt < 0) { 1041 panic("%s: tx_chain_cnt must be >= 0", __func__); 1042 } 1043 m_free(m); 1044 } 1045 } 1046 1047 static struct mbuf * 1048 xn_alloc_one_rx_buffer(struct netfront_rxq *rxq) 1049 { 1050 struct mbuf *m; 1051 1052 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); 1053 if (m == NULL) 1054 return NULL; 1055 m->m_len = m->m_pkthdr.len = MJUMPAGESIZE; 1056 1057 return (m); 1058 } 1059 1060 static void 1061 xn_alloc_rx_buffers(struct netfront_rxq *rxq) 1062 { 1063 RING_IDX req_prod; 1064 int notify; 1065 1066 XN_RX_LOCK_ASSERT(rxq); 1067 1068 if (__predict_false(rxq->info->carrier == 0)) 1069 return; 1070 1071 for (req_prod = rxq->ring.req_prod_pvt; 1072 req_prod - rxq->ring.rsp_cons < NET_RX_RING_SIZE; 1073 req_prod++) { 1074 struct mbuf *m; 1075 unsigned short id; 1076 grant_ref_t ref; 1077 struct netif_rx_request *req; 1078 unsigned long pfn; 1079 1080 m = xn_alloc_one_rx_buffer(rxq); 1081 if (m == NULL) 1082 break; 1083 1084 id = xn_rxidx(req_prod); 1085 1086 KASSERT(rxq->mbufs[id] == NULL, ("non-NULL xn_rx_chain")); 1087 rxq->mbufs[id] = m; 1088 1089 ref = gnttab_claim_grant_reference(&rxq->gref_head); 1090 KASSERT(ref != GNTTAB_LIST_END, 1091 ("reserved grant references exhuasted")); 1092 rxq->grant_ref[id] = ref; 1093 1094 pfn = atop(vtophys(mtod(m, vm_offset_t))); 1095 req = RING_GET_REQUEST(&rxq->ring, req_prod); 1096 1097 gnttab_grant_foreign_access_ref(ref, 1098 xenbus_get_otherend_id(rxq->info->xbdev), pfn, 0); 1099 req->id = id; 1100 req->gref = ref; 1101 } 1102 1103 rxq->ring.req_prod_pvt = req_prod; 1104 1105 /* Not enough requests? Try again later. */ 1106 if (req_prod - rxq->ring.rsp_cons < NET_RX_SLOTS_MIN) { 1107 callout_reset_curcpu(&rxq->rx_refill, hz/10, 1108 xn_alloc_rx_buffers_callout, rxq); 1109 return; 1110 } 1111 1112 wmb(); /* barrier so backend seens requests */ 1113 1114 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rxq->ring, notify); 1115 if (notify) 1116 xen_intr_signal(rxq->xen_intr_handle); 1117 } 1118 1119 static void xn_alloc_rx_buffers_callout(void *arg) 1120 { 1121 struct netfront_rxq *rxq; 1122 1123 rxq = (struct netfront_rxq *)arg; 1124 XN_RX_LOCK(rxq); 1125 xn_alloc_rx_buffers(rxq); 1126 XN_RX_UNLOCK(rxq); 1127 } 1128 1129 static void 1130 xn_release_rx_bufs(struct netfront_rxq *rxq) 1131 { 1132 int i, ref; 1133 struct mbuf *m; 1134 1135 for (i = 0; i < NET_RX_RING_SIZE; i++) { 1136 m = rxq->mbufs[i]; 1137 1138 if (m == NULL) 1139 continue; 1140 1141 ref = rxq->grant_ref[i]; 1142 if (ref == GRANT_REF_INVALID) 1143 continue; 1144 1145 gnttab_end_foreign_access_ref(ref); 1146 gnttab_release_grant_reference(&rxq->gref_head, ref); 1147 rxq->mbufs[i] = NULL; 1148 rxq->grant_ref[i] = GRANT_REF_INVALID; 1149 m_freem(m); 1150 } 1151 } 1152 1153 static void 1154 xn_rxeof(struct netfront_rxq *rxq) 1155 { 1156 struct ifnet *ifp; 1157 struct netfront_info *np = rxq->info; 1158 #if (defined(INET) || defined(INET6)) 1159 struct lro_ctrl *lro = &rxq->lro; 1160 #endif 1161 struct netfront_rx_info rinfo; 1162 struct netif_rx_response *rx = &rinfo.rx; 1163 struct netif_extra_info *extras = rinfo.extras; 1164 RING_IDX i, rp; 1165 struct mbuf *m; 1166 struct mbufq mbufq_rxq, mbufq_errq; 1167 int err, work_to_do; 1168 1169 do { 1170 XN_RX_LOCK_ASSERT(rxq); 1171 if (!netfront_carrier_ok(np)) 1172 return; 1173 1174 /* XXX: there should be some sane limit. */ 1175 mbufq_init(&mbufq_errq, INT_MAX); 1176 mbufq_init(&mbufq_rxq, INT_MAX); 1177 1178 ifp = np->xn_ifp; 1179 1180 rp = rxq->ring.sring->rsp_prod; 1181 rmb(); /* Ensure we see queued responses up to 'rp'. */ 1182 1183 i = rxq->ring.rsp_cons; 1184 while ((i != rp)) { 1185 memcpy(rx, RING_GET_RESPONSE(&rxq->ring, i), sizeof(*rx)); 1186 memset(extras, 0, sizeof(rinfo.extras)); 1187 1188 m = NULL; 1189 err = xn_get_responses(rxq, &rinfo, rp, &i, &m); 1190 1191 if (__predict_false(err)) { 1192 if (m) 1193 (void )mbufq_enqueue(&mbufq_errq, m); 1194 rxq->stats.rx_errors++; 1195 continue; 1196 } 1197 1198 m->m_pkthdr.rcvif = ifp; 1199 if ( rx->flags & NETRXF_data_validated ) { 1200 /* Tell the stack the checksums are okay */ 1201 /* 1202 * XXX this isn't necessarily the case - need to add 1203 * check 1204 */ 1205 1206 m->m_pkthdr.csum_flags |= 1207 (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID 1208 | CSUM_PSEUDO_HDR); 1209 m->m_pkthdr.csum_data = 0xffff; 1210 } 1211 if ((rx->flags & NETRXF_extra_info) != 0 && 1212 (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type == 1213 XEN_NETIF_EXTRA_TYPE_GSO)) { 1214 m->m_pkthdr.tso_segsz = 1215 extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].u.gso.size; 1216 m->m_pkthdr.csum_flags |= CSUM_TSO; 1217 } 1218 1219 rxq->stats.rx_packets++; 1220 rxq->stats.rx_bytes += m->m_pkthdr.len; 1221 1222 (void )mbufq_enqueue(&mbufq_rxq, m); 1223 rxq->ring.rsp_cons = i; 1224 } 1225 1226 mbufq_drain(&mbufq_errq); 1227 1228 /* 1229 * Process all the mbufs after the remapping is complete. 1230 * Break the mbuf chain first though. 1231 */ 1232 while ((m = mbufq_dequeue(&mbufq_rxq)) != NULL) { 1233 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 1234 1235 /* XXX: Do we really need to drop the rx lock? */ 1236 XN_RX_UNLOCK(rxq); 1237 #if (defined(INET) || defined(INET6)) 1238 /* Use LRO if possible */ 1239 if ((ifp->if_capenable & IFCAP_LRO) == 0 || 1240 lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) { 1241 /* 1242 * If LRO fails, pass up to the stack 1243 * directly. 1244 */ 1245 (*ifp->if_input)(ifp, m); 1246 } 1247 #else 1248 (*ifp->if_input)(ifp, m); 1249 #endif 1250 1251 XN_RX_LOCK(rxq); 1252 } 1253 1254 rxq->ring.rsp_cons = i; 1255 1256 #if (defined(INET) || defined(INET6)) 1257 /* 1258 * Flush any outstanding LRO work 1259 */ 1260 tcp_lro_flush_all(lro); 1261 #endif 1262 1263 xn_alloc_rx_buffers(rxq); 1264 1265 RING_FINAL_CHECK_FOR_RESPONSES(&rxq->ring, work_to_do); 1266 } while (work_to_do); 1267 } 1268 1269 static void 1270 xn_txeof(struct netfront_txq *txq) 1271 { 1272 RING_IDX i, prod; 1273 unsigned short id; 1274 struct ifnet *ifp; 1275 netif_tx_response_t *txr; 1276 struct mbuf *m; 1277 struct netfront_info *np = txq->info; 1278 1279 XN_TX_LOCK_ASSERT(txq); 1280 1281 if (!netfront_carrier_ok(np)) 1282 return; 1283 1284 ifp = np->xn_ifp; 1285 1286 do { 1287 prod = txq->ring.sring->rsp_prod; 1288 rmb(); /* Ensure we see responses up to 'rp'. */ 1289 1290 for (i = txq->ring.rsp_cons; i != prod; i++) { 1291 txr = RING_GET_RESPONSE(&txq->ring, i); 1292 if (txr->status == NETIF_RSP_NULL) 1293 continue; 1294 1295 if (txr->status != NETIF_RSP_OKAY) { 1296 printf("%s: WARNING: response is %d!\n", 1297 __func__, txr->status); 1298 } 1299 id = txr->id; 1300 m = txq->mbufs[id]; 1301 KASSERT(m != NULL, ("mbuf not found in chain")); 1302 KASSERT((uintptr_t)m > NET_TX_RING_SIZE, 1303 ("mbuf already on the free list, but we're " 1304 "trying to free it again!")); 1305 M_ASSERTVALID(m); 1306 1307 /* 1308 * Increment packet count if this is the last 1309 * mbuf of the chain. 1310 */ 1311 if (!m->m_next) 1312 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 1313 if (__predict_false(gnttab_query_foreign_access( 1314 txq->grant_ref[id]) != 0)) { 1315 panic("%s: grant id %u still in use by the " 1316 "backend", __func__, id); 1317 } 1318 gnttab_end_foreign_access_ref(txq->grant_ref[id]); 1319 gnttab_release_grant_reference( 1320 &txq->gref_head, txq->grant_ref[id]); 1321 txq->grant_ref[id] = GRANT_REF_INVALID; 1322 1323 txq->mbufs[id] = NULL; 1324 add_id_to_freelist(txq->mbufs, id); 1325 txq->mbufs_cnt--; 1326 m_free(m); 1327 /* Only mark the txq active if we've freed up at least one slot to try */ 1328 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1329 } 1330 txq->ring.rsp_cons = prod; 1331 1332 /* 1333 * Set a new event, then check for race with update of 1334 * tx_cons. Note that it is essential to schedule a 1335 * callback, no matter how few buffers are pending. Even if 1336 * there is space in the transmit ring, higher layers may 1337 * be blocked because too much data is outstanding: in such 1338 * cases notification from Xen is likely to be the only kick 1339 * that we'll get. 1340 */ 1341 txq->ring.sring->rsp_event = 1342 prod + ((txq->ring.sring->req_prod - prod) >> 1) + 1; 1343 1344 mb(); 1345 } while (prod != txq->ring.sring->rsp_prod); 1346 1347 if (txq->full && 1348 ((txq->ring.sring->req_prod - prod) < NET_TX_RING_SIZE)) { 1349 txq->full = false; 1350 xn_txq_start(txq); 1351 } 1352 } 1353 1354 static void 1355 xn_intr(void *xsc) 1356 { 1357 struct netfront_txq *txq = xsc; 1358 struct netfront_info *np = txq->info; 1359 struct netfront_rxq *rxq = &np->rxq[txq->id]; 1360 1361 /* kick both tx and rx */ 1362 xn_rxq_intr(rxq); 1363 xn_txq_intr(txq); 1364 } 1365 1366 static void 1367 xn_move_rx_slot(struct netfront_rxq *rxq, struct mbuf *m, 1368 grant_ref_t ref) 1369 { 1370 int new = xn_rxidx(rxq->ring.req_prod_pvt); 1371 1372 KASSERT(rxq->mbufs[new] == NULL, ("mbufs != NULL")); 1373 rxq->mbufs[new] = m; 1374 rxq->grant_ref[new] = ref; 1375 RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->id = new; 1376 RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->gref = ref; 1377 rxq->ring.req_prod_pvt++; 1378 } 1379 1380 static int 1381 xn_get_extras(struct netfront_rxq *rxq, 1382 struct netif_extra_info *extras, RING_IDX rp, RING_IDX *cons) 1383 { 1384 struct netif_extra_info *extra; 1385 1386 int err = 0; 1387 1388 do { 1389 struct mbuf *m; 1390 grant_ref_t ref; 1391 1392 if (__predict_false(*cons + 1 == rp)) { 1393 err = EINVAL; 1394 break; 1395 } 1396 1397 extra = (struct netif_extra_info *) 1398 RING_GET_RESPONSE(&rxq->ring, ++(*cons)); 1399 1400 if (__predict_false(!extra->type || 1401 extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 1402 err = EINVAL; 1403 } else { 1404 memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); 1405 } 1406 1407 m = xn_get_rx_mbuf(rxq, *cons); 1408 ref = xn_get_rx_ref(rxq, *cons); 1409 xn_move_rx_slot(rxq, m, ref); 1410 } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); 1411 1412 return err; 1413 } 1414 1415 static int 1416 xn_get_responses(struct netfront_rxq *rxq, 1417 struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, 1418 struct mbuf **list) 1419 { 1420 struct netif_rx_response *rx = &rinfo->rx; 1421 struct netif_extra_info *extras = rinfo->extras; 1422 struct mbuf *m, *m0, *m_prev; 1423 grant_ref_t ref = xn_get_rx_ref(rxq, *cons); 1424 RING_IDX ref_cons = *cons; 1425 int frags = 1; 1426 int err = 0; 1427 u_long ret; 1428 1429 m0 = m = m_prev = xn_get_rx_mbuf(rxq, *cons); 1430 1431 if (rx->flags & NETRXF_extra_info) { 1432 err = xn_get_extras(rxq, extras, rp, cons); 1433 } 1434 1435 if (m0 != NULL) { 1436 m0->m_pkthdr.len = 0; 1437 m0->m_next = NULL; 1438 } 1439 1440 for (;;) { 1441 #if 0 1442 DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n", 1443 rx->status, rx->offset, frags); 1444 #endif 1445 if (__predict_false(rx->status < 0 || 1446 rx->offset + rx->status > PAGE_SIZE)) { 1447 1448 xn_move_rx_slot(rxq, m, ref); 1449 if (m0 == m) 1450 m0 = NULL; 1451 m = NULL; 1452 err = EINVAL; 1453 goto next_skip_queue; 1454 } 1455 1456 /* 1457 * This definitely indicates a bug, either in this driver or in 1458 * the backend driver. In future this should flag the bad 1459 * situation to the system controller to reboot the backed. 1460 */ 1461 if (ref == GRANT_REF_INVALID) { 1462 printf("%s: Bad rx response id %d.\n", __func__, rx->id); 1463 err = EINVAL; 1464 goto next; 1465 } 1466 1467 ret = gnttab_end_foreign_access_ref(ref); 1468 KASSERT(ret, ("Unable to end access to grant references")); 1469 1470 gnttab_release_grant_reference(&rxq->gref_head, ref); 1471 1472 next: 1473 if (m == NULL) 1474 break; 1475 1476 m->m_len = rx->status; 1477 m->m_data += rx->offset; 1478 m0->m_pkthdr.len += rx->status; 1479 1480 next_skip_queue: 1481 if (!(rx->flags & NETRXF_more_data)) 1482 break; 1483 1484 if (*cons + frags == rp) { 1485 if (net_ratelimit()) 1486 WPRINTK("Need more frags\n"); 1487 err = ENOENT; 1488 printf("%s: cons %u frags %u rp %u, not enough frags\n", 1489 __func__, *cons, frags, rp); 1490 break; 1491 } 1492 /* 1493 * Note that m can be NULL, if rx->status < 0 or if 1494 * rx->offset + rx->status > PAGE_SIZE above. 1495 */ 1496 m_prev = m; 1497 1498 rx = RING_GET_RESPONSE(&rxq->ring, *cons + frags); 1499 m = xn_get_rx_mbuf(rxq, *cons + frags); 1500 1501 /* 1502 * m_prev == NULL can happen if rx->status < 0 or if 1503 * rx->offset + * rx->status > PAGE_SIZE above. 1504 */ 1505 if (m_prev != NULL) 1506 m_prev->m_next = m; 1507 1508 /* 1509 * m0 can be NULL if rx->status < 0 or if * rx->offset + 1510 * rx->status > PAGE_SIZE above. 1511 */ 1512 if (m0 == NULL) 1513 m0 = m; 1514 m->m_next = NULL; 1515 ref = xn_get_rx_ref(rxq, *cons + frags); 1516 ref_cons = *cons + frags; 1517 frags++; 1518 } 1519 *list = m0; 1520 *cons += frags; 1521 1522 return (err); 1523 } 1524 1525 /** 1526 * \brief Count the number of fragments in an mbuf chain. 1527 * 1528 * Surprisingly, there isn't an M* macro for this. 1529 */ 1530 static inline int 1531 xn_count_frags(struct mbuf *m) 1532 { 1533 int nfrags; 1534 1535 for (nfrags = 0; m != NULL; m = m->m_next) 1536 nfrags++; 1537 1538 return (nfrags); 1539 } 1540 1541 /** 1542 * Given an mbuf chain, make sure we have enough room and then push 1543 * it onto the transmit ring. 1544 */ 1545 static int 1546 xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) 1547 { 1548 struct mbuf *m; 1549 struct netfront_info *np = txq->info; 1550 struct ifnet *ifp = np->xn_ifp; 1551 u_int nfrags; 1552 int otherend_id; 1553 1554 /** 1555 * Defragment the mbuf if necessary. 1556 */ 1557 nfrags = xn_count_frags(m_head); 1558 1559 /* 1560 * Check to see whether this request is longer than netback 1561 * can handle, and try to defrag it. 1562 */ 1563 /** 1564 * It is a bit lame, but the netback driver in Linux can't 1565 * deal with nfrags > MAX_TX_REQ_FRAGS, which is a quirk of 1566 * the Linux network stack. 1567 */ 1568 if (nfrags > np->maxfrags) { 1569 m = m_defrag(m_head, M_NOWAIT); 1570 if (!m) { 1571 /* 1572 * Defrag failed, so free the mbuf and 1573 * therefore drop the packet. 1574 */ 1575 m_freem(m_head); 1576 return (EMSGSIZE); 1577 } 1578 m_head = m; 1579 } 1580 1581 /* Determine how many fragments now exist */ 1582 nfrags = xn_count_frags(m_head); 1583 1584 /* 1585 * Check to see whether the defragmented packet has too many 1586 * segments for the Linux netback driver. 1587 */ 1588 /** 1589 * The FreeBSD TCP stack, with TSO enabled, can produce a chain 1590 * of mbufs longer than Linux can handle. Make sure we don't 1591 * pass a too-long chain over to the other side by dropping the 1592 * packet. It doesn't look like there is currently a way to 1593 * tell the TCP stack to generate a shorter chain of packets. 1594 */ 1595 if (nfrags > MAX_TX_REQ_FRAGS) { 1596 #ifdef DEBUG 1597 printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback " 1598 "won't be able to handle it, dropping\n", 1599 __func__, nfrags, MAX_TX_REQ_FRAGS); 1600 #endif 1601 m_freem(m_head); 1602 return (EMSGSIZE); 1603 } 1604 1605 /* 1606 * This check should be redundant. We've already verified that we 1607 * have enough slots in the ring to handle a packet of maximum 1608 * size, and that our packet is less than the maximum size. Keep 1609 * it in here as an assert for now just to make certain that 1610 * chain_cnt is accurate. 1611 */ 1612 KASSERT((txq->mbufs_cnt + nfrags) <= NET_TX_RING_SIZE, 1613 ("%s: chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE " 1614 "(%d)!", __func__, (int) txq->mbufs_cnt, 1615 (int) nfrags, (int) NET_TX_RING_SIZE)); 1616 1617 /* 1618 * Start packing the mbufs in this chain into 1619 * the fragment pointers. Stop when we run out 1620 * of fragments or hit the end of the mbuf chain. 1621 */ 1622 m = m_head; 1623 otherend_id = xenbus_get_otherend_id(np->xbdev); 1624 for (m = m_head; m; m = m->m_next) { 1625 netif_tx_request_t *tx; 1626 uintptr_t id; 1627 grant_ref_t ref; 1628 u_long mfn; /* XXX Wrong type? */ 1629 1630 tx = RING_GET_REQUEST(&txq->ring, txq->ring.req_prod_pvt); 1631 id = get_id_from_freelist(txq->mbufs); 1632 if (id == 0) 1633 panic("%s: was allocated the freelist head!\n", 1634 __func__); 1635 txq->mbufs_cnt++; 1636 if (txq->mbufs_cnt > NET_TX_RING_SIZE) 1637 panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n", 1638 __func__); 1639 txq->mbufs[id] = m; 1640 tx->id = id; 1641 ref = gnttab_claim_grant_reference(&txq->gref_head); 1642 KASSERT((short)ref >= 0, ("Negative ref")); 1643 mfn = virt_to_mfn(mtod(m, vm_offset_t)); 1644 gnttab_grant_foreign_access_ref(ref, otherend_id, 1645 mfn, GNTMAP_readonly); 1646 tx->gref = txq->grant_ref[id] = ref; 1647 tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1); 1648 tx->flags = 0; 1649 if (m == m_head) { 1650 /* 1651 * The first fragment has the entire packet 1652 * size, subsequent fragments have just the 1653 * fragment size. The backend works out the 1654 * true size of the first fragment by 1655 * subtracting the sizes of the other 1656 * fragments. 1657 */ 1658 tx->size = m->m_pkthdr.len; 1659 1660 /* 1661 * The first fragment contains the checksum flags 1662 * and is optionally followed by extra data for 1663 * TSO etc. 1664 */ 1665 /** 1666 * CSUM_TSO requires checksum offloading. 1667 * Some versions of FreeBSD fail to 1668 * set CSUM_TCP in the CSUM_TSO case, 1669 * so we have to test for CSUM_TSO 1670 * explicitly. 1671 */ 1672 if (m->m_pkthdr.csum_flags 1673 & (CSUM_DELAY_DATA | CSUM_TSO)) { 1674 tx->flags |= (NETTXF_csum_blank 1675 | NETTXF_data_validated); 1676 } 1677 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 1678 struct netif_extra_info *gso = 1679 (struct netif_extra_info *) 1680 RING_GET_REQUEST(&txq->ring, 1681 ++txq->ring.req_prod_pvt); 1682 1683 tx->flags |= NETTXF_extra_info; 1684 1685 gso->u.gso.size = m->m_pkthdr.tso_segsz; 1686 gso->u.gso.type = 1687 XEN_NETIF_GSO_TYPE_TCPV4; 1688 gso->u.gso.pad = 0; 1689 gso->u.gso.features = 0; 1690 1691 gso->type = XEN_NETIF_EXTRA_TYPE_GSO; 1692 gso->flags = 0; 1693 } 1694 } else { 1695 tx->size = m->m_len; 1696 } 1697 if (m->m_next) 1698 tx->flags |= NETTXF_more_data; 1699 1700 txq->ring.req_prod_pvt++; 1701 } 1702 BPF_MTAP(ifp, m_head); 1703 1704 xn_txeof(txq); 1705 1706 txq->stats.tx_bytes += m_head->m_pkthdr.len; 1707 txq->stats.tx_packets++; 1708 1709 return (0); 1710 } 1711 1712 /* equivalent of network_open() in Linux */ 1713 static void 1714 xn_ifinit_locked(struct netfront_info *np) 1715 { 1716 struct ifnet *ifp; 1717 int i; 1718 struct netfront_rxq *rxq; 1719 1720 XN_LOCK_ASSERT(np); 1721 1722 ifp = np->xn_ifp; 1723 1724 if (ifp->if_drv_flags & IFF_DRV_RUNNING || !netfront_carrier_ok(np)) 1725 return; 1726 1727 xn_stop(np); 1728 1729 for (i = 0; i < np->num_queues; i++) { 1730 rxq = &np->rxq[i]; 1731 XN_RX_LOCK(rxq); 1732 xn_alloc_rx_buffers(rxq); 1733 rxq->ring.sring->rsp_event = rxq->ring.rsp_cons + 1; 1734 if (RING_HAS_UNCONSUMED_RESPONSES(&rxq->ring)) 1735 xn_rxeof(rxq); 1736 XN_RX_UNLOCK(rxq); 1737 } 1738 1739 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1740 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1741 if_link_state_change(ifp, LINK_STATE_UP); 1742 } 1743 1744 static void 1745 xn_ifinit(void *xsc) 1746 { 1747 struct netfront_info *sc = xsc; 1748 1749 XN_LOCK(sc); 1750 xn_ifinit_locked(sc); 1751 XN_UNLOCK(sc); 1752 } 1753 1754 static int 1755 xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1756 { 1757 struct netfront_info *sc = ifp->if_softc; 1758 struct ifreq *ifr = (struct ifreq *) data; 1759 device_t dev; 1760 #ifdef INET 1761 struct ifaddr *ifa = (struct ifaddr *)data; 1762 #endif 1763 int mask, error = 0, reinit; 1764 1765 dev = sc->xbdev; 1766 1767 switch(cmd) { 1768 case SIOCSIFADDR: 1769 #ifdef INET 1770 XN_LOCK(sc); 1771 if (ifa->ifa_addr->sa_family == AF_INET) { 1772 ifp->if_flags |= IFF_UP; 1773 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 1774 xn_ifinit_locked(sc); 1775 arp_ifinit(ifp, ifa); 1776 XN_UNLOCK(sc); 1777 } else { 1778 XN_UNLOCK(sc); 1779 #endif 1780 error = ether_ioctl(ifp, cmd, data); 1781 #ifdef INET 1782 } 1783 #endif 1784 break; 1785 case SIOCSIFMTU: 1786 ifp->if_mtu = ifr->ifr_mtu; 1787 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1788 xn_ifinit(sc); 1789 break; 1790 case SIOCSIFFLAGS: 1791 XN_LOCK(sc); 1792 if (ifp->if_flags & IFF_UP) { 1793 /* 1794 * If only the state of the PROMISC flag changed, 1795 * then just use the 'set promisc mode' command 1796 * instead of reinitializing the entire NIC. Doing 1797 * a full re-init means reloading the firmware and 1798 * waiting for it to start up, which may take a 1799 * second or two. 1800 */ 1801 xn_ifinit_locked(sc); 1802 } else { 1803 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1804 xn_stop(sc); 1805 } 1806 } 1807 sc->xn_if_flags = ifp->if_flags; 1808 XN_UNLOCK(sc); 1809 break; 1810 case SIOCSIFCAP: 1811 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 1812 reinit = 0; 1813 1814 if (mask & IFCAP_TXCSUM) { 1815 ifp->if_capenable ^= IFCAP_TXCSUM; 1816 ifp->if_hwassist ^= XN_CSUM_FEATURES; 1817 } 1818 if (mask & IFCAP_TSO4) { 1819 ifp->if_capenable ^= IFCAP_TSO4; 1820 ifp->if_hwassist ^= CSUM_TSO; 1821 } 1822 1823 if (mask & (IFCAP_RXCSUM | IFCAP_LRO)) { 1824 /* These Rx features require us to renegotiate. */ 1825 reinit = 1; 1826 1827 if (mask & IFCAP_RXCSUM) 1828 ifp->if_capenable ^= IFCAP_RXCSUM; 1829 if (mask & IFCAP_LRO) 1830 ifp->if_capenable ^= IFCAP_LRO; 1831 } 1832 1833 if (reinit == 0) 1834 break; 1835 1836 /* 1837 * We must reset the interface so the backend picks up the 1838 * new features. 1839 */ 1840 device_printf(sc->xbdev, 1841 "performing interface reset due to feature change\n"); 1842 XN_LOCK(sc); 1843 netfront_carrier_off(sc); 1844 sc->xn_reset = true; 1845 /* 1846 * NB: the pending packet queue is not flushed, since 1847 * the interface should still support the old options. 1848 */ 1849 XN_UNLOCK(sc); 1850 /* 1851 * Delete the xenstore nodes that export features. 1852 * 1853 * NB: There's a xenbus state called 1854 * "XenbusStateReconfiguring", which is what we should set 1855 * here. Sadly none of the backends know how to handle it, 1856 * and simply disconnect from the frontend, so we will just 1857 * switch back to XenbusStateInitialising in order to force 1858 * a reconnection. 1859 */ 1860 xs_rm(XST_NIL, xenbus_get_node(dev), "feature-gso-tcpv4"); 1861 xs_rm(XST_NIL, xenbus_get_node(dev), "feature-no-csum-offload"); 1862 xenbus_set_state(dev, XenbusStateClosing); 1863 1864 /* 1865 * Wait for the frontend to reconnect before returning 1866 * from the ioctl. 30s should be more than enough for any 1867 * sane backend to reconnect. 1868 */ 1869 error = tsleep(sc, 0, "xn_rst", 30*hz); 1870 break; 1871 case SIOCADDMULTI: 1872 case SIOCDELMULTI: 1873 break; 1874 case SIOCSIFMEDIA: 1875 case SIOCGIFMEDIA: 1876 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 1877 break; 1878 default: 1879 error = ether_ioctl(ifp, cmd, data); 1880 } 1881 1882 return (error); 1883 } 1884 1885 static void 1886 xn_stop(struct netfront_info *sc) 1887 { 1888 struct ifnet *ifp; 1889 1890 XN_LOCK_ASSERT(sc); 1891 1892 ifp = sc->xn_ifp; 1893 1894 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 1895 if_link_state_change(ifp, LINK_STATE_DOWN); 1896 } 1897 1898 static void 1899 xn_rebuild_rx_bufs(struct netfront_rxq *rxq) 1900 { 1901 int requeue_idx, i; 1902 grant_ref_t ref; 1903 netif_rx_request_t *req; 1904 1905 for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { 1906 struct mbuf *m; 1907 u_long pfn; 1908 1909 if (rxq->mbufs[i] == NULL) 1910 continue; 1911 1912 m = rxq->mbufs[requeue_idx] = xn_get_rx_mbuf(rxq, i); 1913 ref = rxq->grant_ref[requeue_idx] = xn_get_rx_ref(rxq, i); 1914 1915 req = RING_GET_REQUEST(&rxq->ring, requeue_idx); 1916 pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT; 1917 1918 gnttab_grant_foreign_access_ref(ref, 1919 xenbus_get_otherend_id(rxq->info->xbdev), 1920 pfn, 0); 1921 1922 req->gref = ref; 1923 req->id = requeue_idx; 1924 1925 requeue_idx++; 1926 } 1927 1928 rxq->ring.req_prod_pvt = requeue_idx; 1929 } 1930 1931 /* START of Xenolinux helper functions adapted to FreeBSD */ 1932 static int 1933 xn_connect(struct netfront_info *np) 1934 { 1935 int i, error; 1936 u_int feature_rx_copy; 1937 struct netfront_rxq *rxq; 1938 struct netfront_txq *txq; 1939 1940 error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 1941 "feature-rx-copy", NULL, "%u", &feature_rx_copy); 1942 if (error != 0) 1943 feature_rx_copy = 0; 1944 1945 /* We only support rx copy. */ 1946 if (!feature_rx_copy) 1947 return (EPROTONOSUPPORT); 1948 1949 /* Recovery procedure: */ 1950 error = talk_to_backend(np->xbdev, np); 1951 if (error != 0) 1952 return (error); 1953 1954 /* Step 1: Reinitialise variables. */ 1955 xn_query_features(np); 1956 xn_configure_features(np); 1957 1958 /* Step 2: Release TX buffer */ 1959 for (i = 0; i < np->num_queues; i++) { 1960 txq = &np->txq[i]; 1961 xn_release_tx_bufs(txq); 1962 } 1963 1964 /* Step 3: Rebuild the RX buffer freelist and the RX ring itself. */ 1965 for (i = 0; i < np->num_queues; i++) { 1966 rxq = &np->rxq[i]; 1967 xn_rebuild_rx_bufs(rxq); 1968 } 1969 1970 /* Step 4: All public and private state should now be sane. Get 1971 * ready to start sending and receiving packets and give the driver 1972 * domain a kick because we've probably just requeued some 1973 * packets. 1974 */ 1975 netfront_carrier_on(np); 1976 wakeup(np); 1977 1978 return (0); 1979 } 1980 1981 static void 1982 xn_kick_rings(struct netfront_info *np) 1983 { 1984 struct netfront_rxq *rxq; 1985 struct netfront_txq *txq; 1986 int i; 1987 1988 for (i = 0; i < np->num_queues; i++) { 1989 txq = &np->txq[i]; 1990 rxq = &np->rxq[i]; 1991 xen_intr_signal(txq->xen_intr_handle); 1992 XN_TX_LOCK(txq); 1993 xn_txeof(txq); 1994 XN_TX_UNLOCK(txq); 1995 XN_RX_LOCK(rxq); 1996 xn_alloc_rx_buffers(rxq); 1997 XN_RX_UNLOCK(rxq); 1998 } 1999 } 2000 2001 static void 2002 xn_query_features(struct netfront_info *np) 2003 { 2004 int val; 2005 2006 device_printf(np->xbdev, "backend features:"); 2007 2008 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2009 "feature-sg", NULL, "%d", &val) != 0) 2010 val = 0; 2011 2012 np->maxfrags = 1; 2013 if (val) { 2014 np->maxfrags = MAX_TX_REQ_FRAGS; 2015 printf(" feature-sg"); 2016 } 2017 2018 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2019 "feature-gso-tcpv4", NULL, "%d", &val) != 0) 2020 val = 0; 2021 2022 np->xn_ifp->if_capabilities &= ~(IFCAP_TSO4|IFCAP_LRO); 2023 if (val) { 2024 np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO; 2025 printf(" feature-gso-tcp4"); 2026 } 2027 2028 /* 2029 * HW CSUM offload is assumed to be available unless 2030 * feature-no-csum-offload is set in xenstore. 2031 */ 2032 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2033 "feature-no-csum-offload", NULL, "%d", &val) != 0) 2034 val = 0; 2035 2036 np->xn_ifp->if_capabilities |= IFCAP_HWCSUM; 2037 if (val) { 2038 np->xn_ifp->if_capabilities &= ~(IFCAP_HWCSUM); 2039 printf(" feature-no-csum-offload"); 2040 } 2041 2042 printf("\n"); 2043 } 2044 2045 static int 2046 xn_configure_features(struct netfront_info *np) 2047 { 2048 int err, cap_enabled; 2049 #if (defined(INET) || defined(INET6)) 2050 int i; 2051 #endif 2052 struct ifnet *ifp; 2053 2054 ifp = np->xn_ifp; 2055 err = 0; 2056 2057 if ((ifp->if_capenable & ifp->if_capabilities) == ifp->if_capenable) { 2058 /* Current options are available, no need to do anything. */ 2059 return (0); 2060 } 2061 2062 /* Try to preserve as many options as possible. */ 2063 cap_enabled = ifp->if_capenable; 2064 ifp->if_capenable = ifp->if_hwassist = 0; 2065 2066 #if (defined(INET) || defined(INET6)) 2067 if ((cap_enabled & IFCAP_LRO) != 0) 2068 for (i = 0; i < np->num_queues; i++) 2069 tcp_lro_free(&np->rxq[i].lro); 2070 if (xn_enable_lro && 2071 (ifp->if_capabilities & cap_enabled & IFCAP_LRO) != 0) { 2072 ifp->if_capenable |= IFCAP_LRO; 2073 for (i = 0; i < np->num_queues; i++) { 2074 err = tcp_lro_init(&np->rxq[i].lro); 2075 if (err != 0) { 2076 device_printf(np->xbdev, 2077 "LRO initialization failed\n"); 2078 ifp->if_capenable &= ~IFCAP_LRO; 2079 break; 2080 } 2081 np->rxq[i].lro.ifp = ifp; 2082 } 2083 } 2084 if ((ifp->if_capabilities & cap_enabled & IFCAP_TSO4) != 0) { 2085 ifp->if_capenable |= IFCAP_TSO4; 2086 ifp->if_hwassist |= CSUM_TSO; 2087 } 2088 #endif 2089 if ((ifp->if_capabilities & cap_enabled & IFCAP_TXCSUM) != 0) { 2090 ifp->if_capenable |= IFCAP_TXCSUM; 2091 ifp->if_hwassist |= XN_CSUM_FEATURES; 2092 } 2093 if ((ifp->if_capabilities & cap_enabled & IFCAP_RXCSUM) != 0) 2094 ifp->if_capenable |= IFCAP_RXCSUM; 2095 2096 return (err); 2097 } 2098 2099 static int 2100 xn_txq_mq_start_locked(struct netfront_txq *txq, struct mbuf *m) 2101 { 2102 struct netfront_info *np; 2103 struct ifnet *ifp; 2104 struct buf_ring *br; 2105 int error, notify; 2106 2107 np = txq->info; 2108 br = txq->br; 2109 ifp = np->xn_ifp; 2110 error = 0; 2111 2112 XN_TX_LOCK_ASSERT(txq); 2113 2114 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2115 !netfront_carrier_ok(np)) { 2116 if (m != NULL) 2117 error = drbr_enqueue(ifp, br, m); 2118 return (error); 2119 } 2120 2121 if (m != NULL) { 2122 error = drbr_enqueue(ifp, br, m); 2123 if (error != 0) 2124 return (error); 2125 } 2126 2127 while ((m = drbr_peek(ifp, br)) != NULL) { 2128 if (!xn_tx_slot_available(txq)) { 2129 drbr_putback(ifp, br, m); 2130 break; 2131 } 2132 2133 error = xn_assemble_tx_request(txq, m); 2134 /* xn_assemble_tx_request always consumes the mbuf*/ 2135 if (error != 0) { 2136 drbr_advance(ifp, br); 2137 break; 2138 } 2139 2140 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&txq->ring, notify); 2141 if (notify) 2142 xen_intr_signal(txq->xen_intr_handle); 2143 2144 drbr_advance(ifp, br); 2145 } 2146 2147 if (RING_FULL(&txq->ring)) 2148 txq->full = true; 2149 2150 return (0); 2151 } 2152 2153 static int 2154 xn_txq_mq_start(struct ifnet *ifp, struct mbuf *m) 2155 { 2156 struct netfront_info *np; 2157 struct netfront_txq *txq; 2158 int i, npairs, error; 2159 2160 np = ifp->if_softc; 2161 npairs = np->num_queues; 2162 2163 if (!netfront_carrier_ok(np)) 2164 return (ENOBUFS); 2165 2166 KASSERT(npairs != 0, ("called with 0 available queues")); 2167 2168 /* check if flowid is set */ 2169 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 2170 i = m->m_pkthdr.flowid % npairs; 2171 else 2172 i = curcpu % npairs; 2173 2174 txq = &np->txq[i]; 2175 2176 if (XN_TX_TRYLOCK(txq) != 0) { 2177 error = xn_txq_mq_start_locked(txq, m); 2178 XN_TX_UNLOCK(txq); 2179 } else { 2180 error = drbr_enqueue(ifp, txq->br, m); 2181 taskqueue_enqueue(txq->tq, &txq->defrtask); 2182 } 2183 2184 return (error); 2185 } 2186 2187 static void 2188 xn_qflush(struct ifnet *ifp) 2189 { 2190 struct netfront_info *np; 2191 struct netfront_txq *txq; 2192 struct mbuf *m; 2193 int i; 2194 2195 np = ifp->if_softc; 2196 2197 for (i = 0; i < np->num_queues; i++) { 2198 txq = &np->txq[i]; 2199 2200 XN_TX_LOCK(txq); 2201 while ((m = buf_ring_dequeue_sc(txq->br)) != NULL) 2202 m_freem(m); 2203 XN_TX_UNLOCK(txq); 2204 } 2205 2206 if_qflush(ifp); 2207 } 2208 2209 /** 2210 * Create a network device. 2211 * @param dev Newbus device representing this virtual NIC. 2212 */ 2213 int 2214 create_netdev(device_t dev) 2215 { 2216 struct netfront_info *np; 2217 int err; 2218 struct ifnet *ifp; 2219 2220 np = device_get_softc(dev); 2221 2222 np->xbdev = dev; 2223 2224 mtx_init(&np->sc_lock, "xnsc", "netfront softc lock", MTX_DEF); 2225 2226 ifmedia_init(&np->sc_media, 0, xn_ifmedia_upd, xn_ifmedia_sts); 2227 ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); 2228 ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL); 2229 2230 err = xen_net_read_mac(dev, np->mac); 2231 if (err != 0) 2232 goto error; 2233 2234 /* Set up ifnet structure */ 2235 ifp = np->xn_ifp = if_alloc(IFT_ETHER); 2236 ifp->if_softc = np; 2237 if_initname(ifp, "xn", device_get_unit(dev)); 2238 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 2239 ifp->if_ioctl = xn_ioctl; 2240 2241 ifp->if_transmit = xn_txq_mq_start; 2242 ifp->if_qflush = xn_qflush; 2243 2244 ifp->if_init = xn_ifinit; 2245 2246 ifp->if_hwassist = XN_CSUM_FEATURES; 2247 /* Enable all supported features at device creation. */ 2248 ifp->if_capenable = ifp->if_capabilities = 2249 IFCAP_HWCSUM|IFCAP_TSO4|IFCAP_LRO; 2250 ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 2251 ifp->if_hw_tsomaxsegcount = MAX_TX_REQ_FRAGS; 2252 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 2253 2254 ether_ifattach(ifp, np->mac); 2255 netfront_carrier_off(np); 2256 2257 return (0); 2258 2259 error: 2260 KASSERT(err != 0, ("Error path with no error code specified")); 2261 return (err); 2262 } 2263 2264 static int 2265 netfront_detach(device_t dev) 2266 { 2267 struct netfront_info *info = device_get_softc(dev); 2268 2269 DPRINTK("%s\n", xenbus_get_node(dev)); 2270 2271 netif_free(info); 2272 2273 return 0; 2274 } 2275 2276 static void 2277 netif_free(struct netfront_info *np) 2278 { 2279 2280 XN_LOCK(np); 2281 xn_stop(np); 2282 XN_UNLOCK(np); 2283 netif_disconnect_backend(np); 2284 ether_ifdetach(np->xn_ifp); 2285 free(np->rxq, M_DEVBUF); 2286 free(np->txq, M_DEVBUF); 2287 if_free(np->xn_ifp); 2288 np->xn_ifp = NULL; 2289 ifmedia_removeall(&np->sc_media); 2290 } 2291 2292 static void 2293 netif_disconnect_backend(struct netfront_info *np) 2294 { 2295 u_int i; 2296 2297 for (i = 0; i < np->num_queues; i++) { 2298 XN_RX_LOCK(&np->rxq[i]); 2299 XN_TX_LOCK(&np->txq[i]); 2300 } 2301 netfront_carrier_off(np); 2302 for (i = 0; i < np->num_queues; i++) { 2303 XN_RX_UNLOCK(&np->rxq[i]); 2304 XN_TX_UNLOCK(&np->txq[i]); 2305 } 2306 2307 for (i = 0; i < np->num_queues; i++) { 2308 disconnect_rxq(&np->rxq[i]); 2309 disconnect_txq(&np->txq[i]); 2310 } 2311 } 2312 2313 static int 2314 xn_ifmedia_upd(struct ifnet *ifp) 2315 { 2316 2317 return (0); 2318 } 2319 2320 static void 2321 xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 2322 { 2323 2324 ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE; 2325 ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; 2326 } 2327 2328 /* ** Driver registration ** */ 2329 static device_method_t netfront_methods[] = { 2330 /* Device interface */ 2331 DEVMETHOD(device_probe, netfront_probe), 2332 DEVMETHOD(device_attach, netfront_attach), 2333 DEVMETHOD(device_detach, netfront_detach), 2334 DEVMETHOD(device_shutdown, bus_generic_shutdown), 2335 DEVMETHOD(device_suspend, netfront_suspend), 2336 DEVMETHOD(device_resume, netfront_resume), 2337 2338 /* Xenbus interface */ 2339 DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed), 2340 2341 DEVMETHOD_END 2342 }; 2343 2344 static driver_t netfront_driver = { 2345 "xn", 2346 netfront_methods, 2347 sizeof(struct netfront_info), 2348 }; 2349 devclass_t netfront_devclass; 2350 2351 DRIVER_MODULE(xe, xenbusb_front, netfront_driver, netfront_devclass, NULL, 2352 NULL); 2353