1 /*- 2 * Copyright (c) 2004-2006 Kip Macy 3 * Copyright (c) 2015 Wei Liu <wei.liu2@citrix.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/sockio.h> 36 #include <sys/limits.h> 37 #include <sys/mbuf.h> 38 #include <sys/malloc.h> 39 #include <sys/module.h> 40 #include <sys/kernel.h> 41 #include <sys/socket.h> 42 #include <sys/sysctl.h> 43 #include <sys/taskqueue.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_arp.h> 48 #include <net/ethernet.h> 49 #include <net/if_media.h> 50 #include <net/bpf.h> 51 #include <net/if_types.h> 52 53 #include <netinet/in.h> 54 #include <netinet/ip.h> 55 #include <netinet/if_ether.h> 56 #include <netinet/tcp.h> 57 #include <netinet/tcp_lro.h> 58 59 #include <vm/vm.h> 60 #include <vm/pmap.h> 61 62 #include <sys/bus.h> 63 64 #include <xen/xen-os.h> 65 #include <xen/hypervisor.h> 66 #include <xen/xen_intr.h> 67 #include <xen/gnttab.h> 68 #include <xen/interface/memory.h> 69 #include <xen/interface/io/netif.h> 70 #include <xen/xenbus/xenbusvar.h> 71 72 #include "xenbus_if.h" 73 74 /* Features supported by all backends. TSO and LRO can be negotiated */ 75 #define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) 76 77 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) 78 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) 79 80 #define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1) 81 82 /* 83 * Should the driver do LRO on the RX end 84 * this can be toggled on the fly, but the 85 * interface must be reset (down/up) for it 86 * to take effect. 87 */ 88 static int xn_enable_lro = 1; 89 TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro); 90 91 /* 92 * Number of pairs of queues. 93 */ 94 static unsigned long xn_num_queues = 4; 95 TUNABLE_ULONG("hw.xn.num_queues", &xn_num_queues); 96 97 /** 98 * \brief The maximum allowed data fragments in a single transmit 99 * request. 100 * 101 * This limit is imposed by the backend driver. We assume here that 102 * we are dealing with a Linux driver domain and have set our limit 103 * to mirror the Linux MAX_SKB_FRAGS constant. 104 */ 105 #define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2) 106 107 #define RX_COPY_THRESHOLD 256 108 109 #define net_ratelimit() 0 110 111 struct netfront_rxq; 112 struct netfront_txq; 113 struct netfront_info; 114 struct netfront_rx_info; 115 116 static void xn_txeof(struct netfront_txq *); 117 static void xn_rxeof(struct netfront_rxq *); 118 static void xn_alloc_rx_buffers(struct netfront_rxq *); 119 static void xn_alloc_rx_buffers_callout(void *arg); 120 121 static void xn_release_rx_bufs(struct netfront_rxq *); 122 static void xn_release_tx_bufs(struct netfront_txq *); 123 124 static void xn_rxq_intr(struct netfront_rxq *); 125 static void xn_txq_intr(struct netfront_txq *); 126 static void xn_intr(void *); 127 static inline int xn_count_frags(struct mbuf *m); 128 static int xn_assemble_tx_request(struct netfront_txq *, struct mbuf *); 129 static int xn_ioctl(struct ifnet *, u_long, caddr_t); 130 static void xn_ifinit_locked(struct netfront_info *); 131 static void xn_ifinit(void *); 132 static void xn_stop(struct netfront_info *); 133 static void xn_query_features(struct netfront_info *np); 134 static int xn_configure_features(struct netfront_info *np); 135 static void netif_free(struct netfront_info *info); 136 static int netfront_detach(device_t dev); 137 138 static int xn_txq_mq_start_locked(struct netfront_txq *, struct mbuf *); 139 static int xn_txq_mq_start(struct ifnet *, struct mbuf *); 140 141 static int talk_to_backend(device_t dev, struct netfront_info *info); 142 static int create_netdev(device_t dev); 143 static void netif_disconnect_backend(struct netfront_info *info); 144 static int setup_device(device_t dev, struct netfront_info *info, 145 unsigned long); 146 static int xn_ifmedia_upd(struct ifnet *ifp); 147 static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); 148 149 int xn_connect(struct netfront_info *); 150 151 static int xn_get_responses(struct netfront_rxq *, 152 struct netfront_rx_info *, RING_IDX, RING_IDX *, 153 struct mbuf **); 154 155 #define virt_to_mfn(x) (vtophys(x) >> PAGE_SHIFT) 156 157 #define INVALID_P2M_ENTRY (~0UL) 158 159 struct xn_rx_stats 160 { 161 u_long rx_packets; /* total packets received */ 162 u_long rx_bytes; /* total bytes received */ 163 u_long rx_errors; /* bad packets received */ 164 }; 165 166 struct xn_tx_stats 167 { 168 u_long tx_packets; /* total packets transmitted */ 169 u_long tx_bytes; /* total bytes transmitted */ 170 u_long tx_errors; /* packet transmit problems */ 171 }; 172 173 #define XN_QUEUE_NAME_LEN 8 /* xn{t,r}x_%u, allow for two digits */ 174 struct netfront_rxq { 175 struct netfront_info *info; 176 u_int id; 177 char name[XN_QUEUE_NAME_LEN]; 178 struct mtx lock; 179 180 int ring_ref; 181 netif_rx_front_ring_t ring; 182 xen_intr_handle_t xen_intr_handle; 183 184 grant_ref_t gref_head; 185 grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; 186 187 struct mbuf *mbufs[NET_RX_RING_SIZE + 1]; 188 189 struct lro_ctrl lro; 190 191 struct callout rx_refill; 192 193 struct xn_rx_stats stats; 194 }; 195 196 struct netfront_txq { 197 struct netfront_info *info; 198 u_int id; 199 char name[XN_QUEUE_NAME_LEN]; 200 struct mtx lock; 201 202 int ring_ref; 203 netif_tx_front_ring_t ring; 204 xen_intr_handle_t xen_intr_handle; 205 206 grant_ref_t gref_head; 207 grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; 208 209 struct mbuf *mbufs[NET_TX_RING_SIZE + 1]; 210 int mbufs_cnt; 211 struct buf_ring *br; 212 213 struct taskqueue *tq; 214 struct task defrtask; 215 216 bool full; 217 218 struct xn_tx_stats stats; 219 }; 220 221 struct netfront_info { 222 struct ifnet *xn_ifp; 223 224 struct mtx sc_lock; 225 226 u_int num_queues; 227 struct netfront_rxq *rxq; 228 struct netfront_txq *txq; 229 230 u_int carrier; 231 u_int maxfrags; 232 233 device_t xbdev; 234 uint8_t mac[ETHER_ADDR_LEN]; 235 236 int xn_if_flags; 237 238 struct ifmedia sc_media; 239 240 bool xn_reset; 241 }; 242 243 struct netfront_rx_info { 244 struct netif_rx_response rx; 245 struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; 246 }; 247 248 #define XN_RX_LOCK(_q) mtx_lock(&(_q)->lock) 249 #define XN_RX_UNLOCK(_q) mtx_unlock(&(_q)->lock) 250 251 #define XN_TX_LOCK(_q) mtx_lock(&(_q)->lock) 252 #define XN_TX_TRYLOCK(_q) mtx_trylock(&(_q)->lock) 253 #define XN_TX_UNLOCK(_q) mtx_unlock(&(_q)->lock) 254 255 #define XN_LOCK(_sc) mtx_lock(&(_sc)->sc_lock); 256 #define XN_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_lock); 257 258 #define XN_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_lock, MA_OWNED); 259 #define XN_RX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); 260 #define XN_TX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); 261 262 #define netfront_carrier_on(netif) ((netif)->carrier = 1) 263 #define netfront_carrier_off(netif) ((netif)->carrier = 0) 264 #define netfront_carrier_ok(netif) ((netif)->carrier) 265 266 /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */ 267 268 static inline void 269 add_id_to_freelist(struct mbuf **list, uintptr_t id) 270 { 271 272 KASSERT(id != 0, 273 ("%s: the head item (0) must always be free.", __func__)); 274 list[id] = list[0]; 275 list[0] = (struct mbuf *)id; 276 } 277 278 static inline unsigned short 279 get_id_from_freelist(struct mbuf **list) 280 { 281 uintptr_t id; 282 283 id = (uintptr_t)list[0]; 284 KASSERT(id != 0, 285 ("%s: the head item (0) must always remain free.", __func__)); 286 list[0] = list[id]; 287 return (id); 288 } 289 290 static inline int 291 xn_rxidx(RING_IDX idx) 292 { 293 294 return idx & (NET_RX_RING_SIZE - 1); 295 } 296 297 static inline struct mbuf * 298 xn_get_rx_mbuf(struct netfront_rxq *rxq, RING_IDX ri) 299 { 300 int i; 301 struct mbuf *m; 302 303 i = xn_rxidx(ri); 304 m = rxq->mbufs[i]; 305 rxq->mbufs[i] = NULL; 306 return (m); 307 } 308 309 static inline grant_ref_t 310 xn_get_rx_ref(struct netfront_rxq *rxq, RING_IDX ri) 311 { 312 int i = xn_rxidx(ri); 313 grant_ref_t ref = rxq->grant_ref[i]; 314 315 KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n")); 316 rxq->grant_ref[i] = GRANT_REF_INVALID; 317 return (ref); 318 } 319 320 #define IPRINTK(fmt, args...) \ 321 printf("[XEN] " fmt, ##args) 322 #ifdef INVARIANTS 323 #define WPRINTK(fmt, args...) \ 324 printf("[XEN] " fmt, ##args) 325 #else 326 #define WPRINTK(fmt, args...) 327 #endif 328 #ifdef DEBUG 329 #define DPRINTK(fmt, args...) \ 330 printf("[XEN] %s: " fmt, __func__, ##args) 331 #else 332 #define DPRINTK(fmt, args...) 333 #endif 334 335 /** 336 * Read the 'mac' node at the given device's node in the store, and parse that 337 * as colon-separated octets, placing result the given mac array. mac must be 338 * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h). 339 * Return 0 on success, or errno on error. 340 */ 341 static int 342 xen_net_read_mac(device_t dev, uint8_t mac[]) 343 { 344 int error, i; 345 char *s, *e, *macstr; 346 const char *path; 347 348 path = xenbus_get_node(dev); 349 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 350 if (error == ENOENT) { 351 /* 352 * Deal with missing mac XenStore nodes on devices with 353 * HVM emulation (the 'ioemu' configuration attribute) 354 * enabled. 355 * 356 * The HVM emulator may execute in a stub device model 357 * domain which lacks the permission, only given to Dom0, 358 * to update the guest's XenStore tree. For this reason, 359 * the HVM emulator doesn't even attempt to write the 360 * front-side mac node, even when operating in Dom0. 361 * However, there should always be a mac listed in the 362 * backend tree. Fallback to this version if our query 363 * of the front side XenStore location doesn't find 364 * anything. 365 */ 366 path = xenbus_get_otherend_path(dev); 367 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 368 } 369 if (error != 0) { 370 xenbus_dev_fatal(dev, error, "parsing %s/mac", path); 371 return (error); 372 } 373 374 s = macstr; 375 for (i = 0; i < ETHER_ADDR_LEN; i++) { 376 mac[i] = strtoul(s, &e, 16); 377 if (s == e || (e[0] != ':' && e[0] != 0)) { 378 free(macstr, M_XENBUS); 379 return (ENOENT); 380 } 381 s = &e[1]; 382 } 383 free(macstr, M_XENBUS); 384 return (0); 385 } 386 387 /** 388 * Entry point to this code when a new device is created. Allocate the basic 389 * structures and the ring buffers for communication with the backend, and 390 * inform the backend of the appropriate details for those. Switch to 391 * Connected state. 392 */ 393 static int 394 netfront_probe(device_t dev) 395 { 396 397 if (xen_hvm_domain() && xen_disable_pv_nics != 0) 398 return (ENXIO); 399 400 if (!strcmp(xenbus_get_type(dev), "vif")) { 401 device_set_desc(dev, "Virtual Network Interface"); 402 return (0); 403 } 404 405 return (ENXIO); 406 } 407 408 static int 409 netfront_attach(device_t dev) 410 { 411 int err; 412 413 err = create_netdev(dev); 414 if (err != 0) { 415 xenbus_dev_fatal(dev, err, "creating netdev"); 416 return (err); 417 } 418 419 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), 420 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 421 OID_AUTO, "enable_lro", CTLFLAG_RW, 422 &xn_enable_lro, 0, "Large Receive Offload"); 423 424 SYSCTL_ADD_ULONG(device_get_sysctl_ctx(dev), 425 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 426 OID_AUTO, "num_queues", CTLFLAG_RD, 427 &xn_num_queues, "Number of pairs of queues"); 428 429 return (0); 430 } 431 432 static int 433 netfront_suspend(device_t dev) 434 { 435 struct netfront_info *np = device_get_softc(dev); 436 u_int i; 437 438 for (i = 0; i < np->num_queues; i++) { 439 XN_RX_LOCK(&np->rxq[i]); 440 XN_TX_LOCK(&np->txq[i]); 441 } 442 netfront_carrier_off(np); 443 for (i = 0; i < np->num_queues; i++) { 444 XN_RX_UNLOCK(&np->rxq[i]); 445 XN_TX_UNLOCK(&np->txq[i]); 446 } 447 return (0); 448 } 449 450 /** 451 * We are reconnecting to the backend, due to a suspend/resume, or a backend 452 * driver restart. We tear down our netif structure and recreate it, but 453 * leave the device-layer structures intact so that this is transparent to the 454 * rest of the kernel. 455 */ 456 static int 457 netfront_resume(device_t dev) 458 { 459 struct netfront_info *info = device_get_softc(dev); 460 461 netif_disconnect_backend(info); 462 return (0); 463 } 464 465 static int 466 write_queue_xenstore_keys(device_t dev, 467 struct netfront_rxq *rxq, 468 struct netfront_txq *txq, 469 struct xs_transaction *xst, bool hierarchy) 470 { 471 int err; 472 const char *message; 473 const char *node = xenbus_get_node(dev); 474 char *path; 475 size_t path_size; 476 477 KASSERT(rxq->id == txq->id, ("Mismatch between RX and TX queue ids")); 478 /* Split event channel support is not yet there. */ 479 KASSERT(rxq->xen_intr_handle == txq->xen_intr_handle, 480 ("Split event channels are not supported")); 481 482 if (hierarchy) { 483 path_size = strlen(node) + 10; 484 path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); 485 snprintf(path, path_size, "%s/queue-%u", node, rxq->id); 486 } else { 487 path_size = strlen(node) + 1; 488 path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); 489 snprintf(path, path_size, "%s", node); 490 } 491 492 err = xs_printf(*xst, path, "tx-ring-ref","%u", txq->ring_ref); 493 if (err != 0) { 494 message = "writing tx ring-ref"; 495 goto error; 496 } 497 err = xs_printf(*xst, path, "rx-ring-ref","%u", rxq->ring_ref); 498 if (err != 0) { 499 message = "writing rx ring-ref"; 500 goto error; 501 } 502 err = xs_printf(*xst, path, "event-channel", "%u", 503 xen_intr_port(rxq->xen_intr_handle)); 504 if (err != 0) { 505 message = "writing event-channel"; 506 goto error; 507 } 508 509 free(path, M_DEVBUF); 510 511 return (0); 512 513 error: 514 free(path, M_DEVBUF); 515 xenbus_dev_fatal(dev, err, "%s", message); 516 517 return (err); 518 } 519 520 /* Common code used when first setting up, and when resuming. */ 521 static int 522 talk_to_backend(device_t dev, struct netfront_info *info) 523 { 524 const char *message; 525 struct xs_transaction xst; 526 const char *node = xenbus_get_node(dev); 527 int err; 528 unsigned long num_queues, max_queues = 0; 529 unsigned int i; 530 531 err = xen_net_read_mac(dev, info->mac); 532 if (err != 0) { 533 xenbus_dev_fatal(dev, err, "parsing %s/mac", node); 534 goto out; 535 } 536 537 err = xs_scanf(XST_NIL, xenbus_get_otherend_path(info->xbdev), 538 "multi-queue-max-queues", NULL, "%lu", &max_queues); 539 if (err != 0) 540 max_queues = 1; 541 num_queues = xn_num_queues; 542 if (num_queues > max_queues) 543 num_queues = max_queues; 544 545 err = setup_device(dev, info, num_queues); 546 if (err != 0) 547 goto out; 548 549 again: 550 err = xs_transaction_start(&xst); 551 if (err != 0) { 552 xenbus_dev_fatal(dev, err, "starting transaction"); 553 goto free; 554 } 555 556 if (info->num_queues == 1) { 557 err = write_queue_xenstore_keys(dev, &info->rxq[0], 558 &info->txq[0], &xst, false); 559 if (err != 0) 560 goto abort_transaction_no_def_error; 561 } else { 562 err = xs_printf(xst, node, "multi-queue-num-queues", 563 "%u", info->num_queues); 564 if (err != 0) { 565 message = "writing multi-queue-num-queues"; 566 goto abort_transaction; 567 } 568 569 for (i = 0; i < info->num_queues; i++) { 570 err = write_queue_xenstore_keys(dev, &info->rxq[i], 571 &info->txq[i], &xst, true); 572 if (err != 0) 573 goto abort_transaction_no_def_error; 574 } 575 } 576 577 err = xs_printf(xst, node, "request-rx-copy", "%u", 1); 578 if (err != 0) { 579 message = "writing request-rx-copy"; 580 goto abort_transaction; 581 } 582 err = xs_printf(xst, node, "feature-rx-notify", "%d", 1); 583 if (err != 0) { 584 message = "writing feature-rx-notify"; 585 goto abort_transaction; 586 } 587 err = xs_printf(xst, node, "feature-sg", "%d", 1); 588 if (err != 0) { 589 message = "writing feature-sg"; 590 goto abort_transaction; 591 } 592 if ((info->xn_ifp->if_capenable & IFCAP_LRO) != 0) { 593 err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1); 594 if (err != 0) { 595 message = "writing feature-gso-tcpv4"; 596 goto abort_transaction; 597 } 598 } 599 if ((info->xn_ifp->if_capenable & IFCAP_RXCSUM) == 0) { 600 err = xs_printf(xst, node, "feature-no-csum-offload", "%d", 1); 601 if (err != 0) { 602 message = "writing feature-no-csum-offload"; 603 goto abort_transaction; 604 } 605 } 606 607 err = xs_transaction_end(xst, 0); 608 if (err != 0) { 609 if (err == EAGAIN) 610 goto again; 611 xenbus_dev_fatal(dev, err, "completing transaction"); 612 goto free; 613 } 614 615 return 0; 616 617 abort_transaction: 618 xenbus_dev_fatal(dev, err, "%s", message); 619 abort_transaction_no_def_error: 620 xs_transaction_end(xst, 1); 621 free: 622 netif_free(info); 623 out: 624 return (err); 625 } 626 627 static void 628 xn_rxq_intr(struct netfront_rxq *rxq) 629 { 630 631 XN_RX_LOCK(rxq); 632 xn_rxeof(rxq); 633 XN_RX_UNLOCK(rxq); 634 } 635 636 static void 637 xn_txq_start(struct netfront_txq *txq) 638 { 639 struct netfront_info *np = txq->info; 640 struct ifnet *ifp = np->xn_ifp; 641 642 XN_TX_LOCK_ASSERT(txq); 643 if (!drbr_empty(ifp, txq->br)) 644 xn_txq_mq_start_locked(txq, NULL); 645 } 646 647 static void 648 xn_txq_intr(struct netfront_txq *txq) 649 { 650 651 XN_TX_LOCK(txq); 652 if (RING_HAS_UNCONSUMED_RESPONSES(&txq->ring)) 653 xn_txeof(txq); 654 xn_txq_start(txq); 655 XN_TX_UNLOCK(txq); 656 } 657 658 static void 659 xn_txq_tq_deferred(void *xtxq, int pending) 660 { 661 struct netfront_txq *txq = xtxq; 662 663 XN_TX_LOCK(txq); 664 xn_txq_start(txq); 665 XN_TX_UNLOCK(txq); 666 } 667 668 static void 669 disconnect_rxq(struct netfront_rxq *rxq) 670 { 671 672 xn_release_rx_bufs(rxq); 673 gnttab_free_grant_references(rxq->gref_head); 674 gnttab_end_foreign_access(rxq->ring_ref, NULL); 675 /* 676 * No split event channel support at the moment, handle will 677 * be unbound in tx. So no need to call xen_intr_unbind here, 678 * but we do want to reset the handler to 0. 679 */ 680 rxq->xen_intr_handle = 0; 681 } 682 683 static void 684 destroy_rxq(struct netfront_rxq *rxq) 685 { 686 687 callout_drain(&rxq->rx_refill); 688 free(rxq->ring.sring, M_DEVBUF); 689 } 690 691 static void 692 destroy_rxqs(struct netfront_info *np) 693 { 694 int i; 695 696 for (i = 0; i < np->num_queues; i++) 697 destroy_rxq(&np->rxq[i]); 698 699 free(np->rxq, M_DEVBUF); 700 np->rxq = NULL; 701 } 702 703 static int 704 setup_rxqs(device_t dev, struct netfront_info *info, 705 unsigned long num_queues) 706 { 707 int q, i; 708 int error; 709 netif_rx_sring_t *rxs; 710 struct netfront_rxq *rxq; 711 712 info->rxq = malloc(sizeof(struct netfront_rxq) * num_queues, 713 M_DEVBUF, M_WAITOK|M_ZERO); 714 715 for (q = 0; q < num_queues; q++) { 716 rxq = &info->rxq[q]; 717 718 rxq->id = q; 719 rxq->info = info; 720 rxq->ring_ref = GRANT_REF_INVALID; 721 rxq->ring.sring = NULL; 722 snprintf(rxq->name, XN_QUEUE_NAME_LEN, "xnrx_%u", q); 723 mtx_init(&rxq->lock, rxq->name, "netfront receive lock", 724 MTX_DEF); 725 726 for (i = 0; i <= NET_RX_RING_SIZE; i++) { 727 rxq->mbufs[i] = NULL; 728 rxq->grant_ref[i] = GRANT_REF_INVALID; 729 } 730 731 /* Start resources allocation */ 732 733 if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, 734 &rxq->gref_head) != 0) { 735 device_printf(dev, "allocating rx gref"); 736 error = ENOMEM; 737 goto fail; 738 } 739 740 rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, 741 M_WAITOK|M_ZERO); 742 SHARED_RING_INIT(rxs); 743 FRONT_RING_INIT(&rxq->ring, rxs, PAGE_SIZE); 744 745 error = xenbus_grant_ring(dev, virt_to_mfn(rxs), 746 &rxq->ring_ref); 747 if (error != 0) { 748 device_printf(dev, "granting rx ring page"); 749 goto fail_grant_ring; 750 } 751 752 callout_init(&rxq->rx_refill, 1); 753 } 754 755 return (0); 756 757 fail_grant_ring: 758 gnttab_free_grant_references(rxq->gref_head); 759 free(rxq->ring.sring, M_DEVBUF); 760 fail: 761 for (; q >= 0; q--) { 762 disconnect_rxq(&info->rxq[q]); 763 destroy_rxq(&info->rxq[q]); 764 } 765 766 free(info->rxq, M_DEVBUF); 767 return (error); 768 } 769 770 static void 771 disconnect_txq(struct netfront_txq *txq) 772 { 773 774 xn_release_tx_bufs(txq); 775 gnttab_free_grant_references(txq->gref_head); 776 gnttab_end_foreign_access(txq->ring_ref, NULL); 777 xen_intr_unbind(&txq->xen_intr_handle); 778 } 779 780 static void 781 destroy_txq(struct netfront_txq *txq) 782 { 783 784 free(txq->ring.sring, M_DEVBUF); 785 buf_ring_free(txq->br, M_DEVBUF); 786 taskqueue_drain_all(txq->tq); 787 taskqueue_free(txq->tq); 788 } 789 790 static void 791 destroy_txqs(struct netfront_info *np) 792 { 793 int i; 794 795 for (i = 0; i < np->num_queues; i++) 796 destroy_txq(&np->txq[i]); 797 798 free(np->txq, M_DEVBUF); 799 np->txq = NULL; 800 } 801 802 static int 803 setup_txqs(device_t dev, struct netfront_info *info, 804 unsigned long num_queues) 805 { 806 int q, i; 807 int error; 808 netif_tx_sring_t *txs; 809 struct netfront_txq *txq; 810 811 info->txq = malloc(sizeof(struct netfront_txq) * num_queues, 812 M_DEVBUF, M_WAITOK|M_ZERO); 813 814 for (q = 0; q < num_queues; q++) { 815 txq = &info->txq[q]; 816 817 txq->id = q; 818 txq->info = info; 819 820 txq->ring_ref = GRANT_REF_INVALID; 821 txq->ring.sring = NULL; 822 823 snprintf(txq->name, XN_QUEUE_NAME_LEN, "xntx_%u", q); 824 825 mtx_init(&txq->lock, txq->name, "netfront transmit lock", 826 MTX_DEF); 827 828 for (i = 0; i <= NET_TX_RING_SIZE; i++) { 829 txq->mbufs[i] = (void *) ((u_long) i+1); 830 txq->grant_ref[i] = GRANT_REF_INVALID; 831 } 832 txq->mbufs[NET_TX_RING_SIZE] = (void *)0; 833 834 /* Start resources allocation. */ 835 836 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, 837 &txq->gref_head) != 0) { 838 device_printf(dev, "failed to allocate tx grant refs\n"); 839 error = ENOMEM; 840 goto fail; 841 } 842 843 txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, 844 M_WAITOK|M_ZERO); 845 SHARED_RING_INIT(txs); 846 FRONT_RING_INIT(&txq->ring, txs, PAGE_SIZE); 847 848 error = xenbus_grant_ring(dev, virt_to_mfn(txs), 849 &txq->ring_ref); 850 if (error != 0) { 851 device_printf(dev, "failed to grant tx ring\n"); 852 goto fail_grant_ring; 853 } 854 855 txq->br = buf_ring_alloc(NET_TX_RING_SIZE, M_DEVBUF, 856 M_WAITOK, &txq->lock); 857 TASK_INIT(&txq->defrtask, 0, xn_txq_tq_deferred, txq); 858 859 txq->tq = taskqueue_create(txq->name, M_WAITOK, 860 taskqueue_thread_enqueue, &txq->tq); 861 862 error = taskqueue_start_threads(&txq->tq, 1, PI_NET, 863 "%s txq %d", device_get_nameunit(dev), txq->id); 864 if (error != 0) { 865 device_printf(dev, "failed to start tx taskq %d\n", 866 txq->id); 867 goto fail_start_thread; 868 } 869 870 error = xen_intr_alloc_and_bind_local_port(dev, 871 xenbus_get_otherend_id(dev), /* filter */ NULL, xn_intr, 872 &info->txq[q], INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY, 873 &txq->xen_intr_handle); 874 875 if (error != 0) { 876 device_printf(dev, "xen_intr_alloc_and_bind_local_port failed\n"); 877 goto fail_bind_port; 878 } 879 } 880 881 return (0); 882 883 fail_bind_port: 884 taskqueue_drain_all(txq->tq); 885 fail_start_thread: 886 buf_ring_free(txq->br, M_DEVBUF); 887 taskqueue_free(txq->tq); 888 gnttab_end_foreign_access(txq->ring_ref, NULL); 889 fail_grant_ring: 890 gnttab_free_grant_references(txq->gref_head); 891 free(txq->ring.sring, M_DEVBUF); 892 fail: 893 for (; q >= 0; q--) { 894 disconnect_txq(&info->txq[q]); 895 destroy_txq(&info->txq[q]); 896 } 897 898 free(info->txq, M_DEVBUF); 899 return (error); 900 } 901 902 static int 903 setup_device(device_t dev, struct netfront_info *info, 904 unsigned long num_queues) 905 { 906 int error; 907 int q; 908 909 if (info->txq) 910 destroy_txqs(info); 911 912 if (info->rxq) 913 destroy_rxqs(info); 914 915 info->num_queues = 0; 916 917 error = setup_rxqs(dev, info, num_queues); 918 if (error != 0) 919 goto out; 920 error = setup_txqs(dev, info, num_queues); 921 if (error != 0) 922 goto out; 923 924 info->num_queues = num_queues; 925 926 /* No split event channel at the moment. */ 927 for (q = 0; q < num_queues; q++) 928 info->rxq[q].xen_intr_handle = info->txq[q].xen_intr_handle; 929 930 return (0); 931 932 out: 933 KASSERT(error != 0, ("Error path taken without providing an error code")); 934 return (error); 935 } 936 937 #ifdef INET 938 /** 939 * If this interface has an ipv4 address, send an arp for it. This 940 * helps to get the network going again after migrating hosts. 941 */ 942 static void 943 netfront_send_fake_arp(device_t dev, struct netfront_info *info) 944 { 945 struct ifnet *ifp; 946 struct ifaddr *ifa; 947 948 ifp = info->xn_ifp; 949 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 950 if (ifa->ifa_addr->sa_family == AF_INET) { 951 arp_ifinit(ifp, ifa); 952 } 953 } 954 } 955 #endif 956 957 /** 958 * Callback received when the backend's state changes. 959 */ 960 static void 961 netfront_backend_changed(device_t dev, XenbusState newstate) 962 { 963 struct netfront_info *sc = device_get_softc(dev); 964 965 DPRINTK("newstate=%d\n", newstate); 966 967 switch (newstate) { 968 case XenbusStateInitialising: 969 case XenbusStateInitialised: 970 case XenbusStateUnknown: 971 case XenbusStateReconfigured: 972 case XenbusStateReconfiguring: 973 break; 974 case XenbusStateInitWait: 975 if (xenbus_get_state(dev) != XenbusStateInitialising) 976 break; 977 if (xn_connect(sc) != 0) 978 break; 979 xenbus_set_state(dev, XenbusStateConnected); 980 break; 981 case XenbusStateClosing: 982 xenbus_set_state(dev, XenbusStateClosed); 983 break; 984 case XenbusStateClosed: 985 if (sc->xn_reset) { 986 netif_disconnect_backend(sc); 987 xenbus_set_state(dev, XenbusStateInitialising); 988 sc->xn_reset = false; 989 } 990 break; 991 case XenbusStateConnected: 992 #ifdef INET 993 netfront_send_fake_arp(dev, sc); 994 #endif 995 break; 996 } 997 } 998 999 /** 1000 * \brief Verify that there is sufficient space in the Tx ring 1001 * buffer for a maximally sized request to be enqueued. 1002 * 1003 * A transmit request requires a transmit descriptor for each packet 1004 * fragment, plus up to 2 entries for "options" (e.g. TSO). 1005 */ 1006 static inline int 1007 xn_tx_slot_available(struct netfront_txq *txq) 1008 { 1009 1010 return (RING_FREE_REQUESTS(&txq->ring) > (MAX_TX_REQ_FRAGS + 2)); 1011 } 1012 1013 static void 1014 xn_release_tx_bufs(struct netfront_txq *txq) 1015 { 1016 int i; 1017 1018 for (i = 1; i <= NET_TX_RING_SIZE; i++) { 1019 struct mbuf *m; 1020 1021 m = txq->mbufs[i]; 1022 1023 /* 1024 * We assume that no kernel addresses are 1025 * less than NET_TX_RING_SIZE. Any entry 1026 * in the table that is below this number 1027 * must be an index from free-list tracking. 1028 */ 1029 if (((uintptr_t)m) <= NET_TX_RING_SIZE) 1030 continue; 1031 gnttab_end_foreign_access_ref(txq->grant_ref[i]); 1032 gnttab_release_grant_reference(&txq->gref_head, 1033 txq->grant_ref[i]); 1034 txq->grant_ref[i] = GRANT_REF_INVALID; 1035 add_id_to_freelist(txq->mbufs, i); 1036 txq->mbufs_cnt--; 1037 if (txq->mbufs_cnt < 0) { 1038 panic("%s: tx_chain_cnt must be >= 0", __func__); 1039 } 1040 m_free(m); 1041 } 1042 } 1043 1044 static struct mbuf * 1045 xn_alloc_one_rx_buffer(struct netfront_rxq *rxq) 1046 { 1047 struct mbuf *m; 1048 1049 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); 1050 if (m == NULL) 1051 return NULL; 1052 m->m_len = m->m_pkthdr.len = MJUMPAGESIZE; 1053 1054 return (m); 1055 } 1056 1057 static void 1058 xn_alloc_rx_buffers(struct netfront_rxq *rxq) 1059 { 1060 RING_IDX req_prod; 1061 int notify; 1062 1063 XN_RX_LOCK_ASSERT(rxq); 1064 1065 if (__predict_false(rxq->info->carrier == 0)) 1066 return; 1067 1068 for (req_prod = rxq->ring.req_prod_pvt; 1069 req_prod - rxq->ring.rsp_cons < NET_RX_RING_SIZE; 1070 req_prod++) { 1071 struct mbuf *m; 1072 unsigned short id; 1073 grant_ref_t ref; 1074 struct netif_rx_request *req; 1075 unsigned long pfn; 1076 1077 m = xn_alloc_one_rx_buffer(rxq); 1078 if (m == NULL) 1079 break; 1080 1081 id = xn_rxidx(req_prod); 1082 1083 KASSERT(rxq->mbufs[id] == NULL, ("non-NULL xn_rx_chain")); 1084 rxq->mbufs[id] = m; 1085 1086 ref = gnttab_claim_grant_reference(&rxq->gref_head); 1087 KASSERT(ref != GNTTAB_LIST_END, 1088 ("reserved grant references exhuasted")); 1089 rxq->grant_ref[id] = ref; 1090 1091 pfn = atop(vtophys(mtod(m, vm_offset_t))); 1092 req = RING_GET_REQUEST(&rxq->ring, req_prod); 1093 1094 gnttab_grant_foreign_access_ref(ref, 1095 xenbus_get_otherend_id(rxq->info->xbdev), pfn, 0); 1096 req->id = id; 1097 req->gref = ref; 1098 } 1099 1100 rxq->ring.req_prod_pvt = req_prod; 1101 1102 /* Not enough requests? Try again later. */ 1103 if (req_prod - rxq->ring.rsp_cons < NET_RX_SLOTS_MIN) { 1104 callout_reset_curcpu(&rxq->rx_refill, hz/10, 1105 xn_alloc_rx_buffers_callout, rxq); 1106 return; 1107 } 1108 1109 wmb(); /* barrier so backend seens requests */ 1110 1111 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rxq->ring, notify); 1112 if (notify) 1113 xen_intr_signal(rxq->xen_intr_handle); 1114 } 1115 1116 static void xn_alloc_rx_buffers_callout(void *arg) 1117 { 1118 struct netfront_rxq *rxq; 1119 1120 rxq = (struct netfront_rxq *)arg; 1121 XN_RX_LOCK(rxq); 1122 xn_alloc_rx_buffers(rxq); 1123 XN_RX_UNLOCK(rxq); 1124 } 1125 1126 static void 1127 xn_release_rx_bufs(struct netfront_rxq *rxq) 1128 { 1129 int i, ref; 1130 struct mbuf *m; 1131 1132 for (i = 0; i < NET_RX_RING_SIZE; i++) { 1133 m = rxq->mbufs[i]; 1134 1135 if (m == NULL) 1136 continue; 1137 1138 ref = rxq->grant_ref[i]; 1139 if (ref == GRANT_REF_INVALID) 1140 continue; 1141 1142 gnttab_end_foreign_access_ref(ref); 1143 gnttab_release_grant_reference(&rxq->gref_head, ref); 1144 rxq->mbufs[i] = NULL; 1145 rxq->grant_ref[i] = GRANT_REF_INVALID; 1146 m_freem(m); 1147 } 1148 } 1149 1150 static void 1151 xn_rxeof(struct netfront_rxq *rxq) 1152 { 1153 struct ifnet *ifp; 1154 struct netfront_info *np = rxq->info; 1155 #if (defined(INET) || defined(INET6)) 1156 struct lro_ctrl *lro = &rxq->lro; 1157 #endif 1158 struct netfront_rx_info rinfo; 1159 struct netif_rx_response *rx = &rinfo.rx; 1160 struct netif_extra_info *extras = rinfo.extras; 1161 RING_IDX i, rp; 1162 struct mbuf *m; 1163 struct mbufq mbufq_rxq, mbufq_errq; 1164 int err, work_to_do; 1165 1166 do { 1167 XN_RX_LOCK_ASSERT(rxq); 1168 if (!netfront_carrier_ok(np)) 1169 return; 1170 1171 /* XXX: there should be some sane limit. */ 1172 mbufq_init(&mbufq_errq, INT_MAX); 1173 mbufq_init(&mbufq_rxq, INT_MAX); 1174 1175 ifp = np->xn_ifp; 1176 1177 rp = rxq->ring.sring->rsp_prod; 1178 rmb(); /* Ensure we see queued responses up to 'rp'. */ 1179 1180 i = rxq->ring.rsp_cons; 1181 while ((i != rp)) { 1182 memcpy(rx, RING_GET_RESPONSE(&rxq->ring, i), sizeof(*rx)); 1183 memset(extras, 0, sizeof(rinfo.extras)); 1184 1185 m = NULL; 1186 err = xn_get_responses(rxq, &rinfo, rp, &i, &m); 1187 1188 if (__predict_false(err)) { 1189 if (m) 1190 (void )mbufq_enqueue(&mbufq_errq, m); 1191 rxq->stats.rx_errors++; 1192 continue; 1193 } 1194 1195 m->m_pkthdr.rcvif = ifp; 1196 if ( rx->flags & NETRXF_data_validated ) { 1197 /* Tell the stack the checksums are okay */ 1198 /* 1199 * XXX this isn't necessarily the case - need to add 1200 * check 1201 */ 1202 1203 m->m_pkthdr.csum_flags |= 1204 (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID 1205 | CSUM_PSEUDO_HDR); 1206 m->m_pkthdr.csum_data = 0xffff; 1207 } 1208 if ((rx->flags & NETRXF_extra_info) != 0 && 1209 (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type == 1210 XEN_NETIF_EXTRA_TYPE_GSO)) { 1211 m->m_pkthdr.tso_segsz = 1212 extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].u.gso.size; 1213 m->m_pkthdr.csum_flags |= CSUM_TSO; 1214 } 1215 1216 rxq->stats.rx_packets++; 1217 rxq->stats.rx_bytes += m->m_pkthdr.len; 1218 1219 (void )mbufq_enqueue(&mbufq_rxq, m); 1220 rxq->ring.rsp_cons = i; 1221 } 1222 1223 mbufq_drain(&mbufq_errq); 1224 1225 /* 1226 * Process all the mbufs after the remapping is complete. 1227 * Break the mbuf chain first though. 1228 */ 1229 while ((m = mbufq_dequeue(&mbufq_rxq)) != NULL) { 1230 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 1231 1232 /* XXX: Do we really need to drop the rx lock? */ 1233 XN_RX_UNLOCK(rxq); 1234 #if (defined(INET) || defined(INET6)) 1235 /* Use LRO if possible */ 1236 if ((ifp->if_capenable & IFCAP_LRO) == 0 || 1237 lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) { 1238 /* 1239 * If LRO fails, pass up to the stack 1240 * directly. 1241 */ 1242 (*ifp->if_input)(ifp, m); 1243 } 1244 #else 1245 (*ifp->if_input)(ifp, m); 1246 #endif 1247 1248 XN_RX_LOCK(rxq); 1249 } 1250 1251 rxq->ring.rsp_cons = i; 1252 1253 #if (defined(INET) || defined(INET6)) 1254 /* 1255 * Flush any outstanding LRO work 1256 */ 1257 tcp_lro_flush_all(lro); 1258 #endif 1259 1260 xn_alloc_rx_buffers(rxq); 1261 1262 RING_FINAL_CHECK_FOR_RESPONSES(&rxq->ring, work_to_do); 1263 } while (work_to_do); 1264 } 1265 1266 static void 1267 xn_txeof(struct netfront_txq *txq) 1268 { 1269 RING_IDX i, prod; 1270 unsigned short id; 1271 struct ifnet *ifp; 1272 netif_tx_response_t *txr; 1273 struct mbuf *m; 1274 struct netfront_info *np = txq->info; 1275 1276 XN_TX_LOCK_ASSERT(txq); 1277 1278 if (!netfront_carrier_ok(np)) 1279 return; 1280 1281 ifp = np->xn_ifp; 1282 1283 do { 1284 prod = txq->ring.sring->rsp_prod; 1285 rmb(); /* Ensure we see responses up to 'rp'. */ 1286 1287 for (i = txq->ring.rsp_cons; i != prod; i++) { 1288 txr = RING_GET_RESPONSE(&txq->ring, i); 1289 if (txr->status == NETIF_RSP_NULL) 1290 continue; 1291 1292 if (txr->status != NETIF_RSP_OKAY) { 1293 printf("%s: WARNING: response is %d!\n", 1294 __func__, txr->status); 1295 } 1296 id = txr->id; 1297 m = txq->mbufs[id]; 1298 KASSERT(m != NULL, ("mbuf not found in chain")); 1299 KASSERT((uintptr_t)m > NET_TX_RING_SIZE, 1300 ("mbuf already on the free list, but we're " 1301 "trying to free it again!")); 1302 M_ASSERTVALID(m); 1303 1304 /* 1305 * Increment packet count if this is the last 1306 * mbuf of the chain. 1307 */ 1308 if (!m->m_next) 1309 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 1310 if (__predict_false(gnttab_query_foreign_access( 1311 txq->grant_ref[id]) != 0)) { 1312 panic("%s: grant id %u still in use by the " 1313 "backend", __func__, id); 1314 } 1315 gnttab_end_foreign_access_ref(txq->grant_ref[id]); 1316 gnttab_release_grant_reference( 1317 &txq->gref_head, txq->grant_ref[id]); 1318 txq->grant_ref[id] = GRANT_REF_INVALID; 1319 1320 txq->mbufs[id] = NULL; 1321 add_id_to_freelist(txq->mbufs, id); 1322 txq->mbufs_cnt--; 1323 m_free(m); 1324 /* Only mark the txq active if we've freed up at least one slot to try */ 1325 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1326 } 1327 txq->ring.rsp_cons = prod; 1328 1329 /* 1330 * Set a new event, then check for race with update of 1331 * tx_cons. Note that it is essential to schedule a 1332 * callback, no matter how few buffers are pending. Even if 1333 * there is space in the transmit ring, higher layers may 1334 * be blocked because too much data is outstanding: in such 1335 * cases notification from Xen is likely to be the only kick 1336 * that we'll get. 1337 */ 1338 txq->ring.sring->rsp_event = 1339 prod + ((txq->ring.sring->req_prod - prod) >> 1) + 1; 1340 1341 mb(); 1342 } while (prod != txq->ring.sring->rsp_prod); 1343 1344 if (txq->full && 1345 ((txq->ring.sring->req_prod - prod) < NET_TX_RING_SIZE)) { 1346 txq->full = false; 1347 xn_txq_start(txq); 1348 } 1349 } 1350 1351 static void 1352 xn_intr(void *xsc) 1353 { 1354 struct netfront_txq *txq = xsc; 1355 struct netfront_info *np = txq->info; 1356 struct netfront_rxq *rxq = &np->rxq[txq->id]; 1357 1358 /* kick both tx and rx */ 1359 xn_rxq_intr(rxq); 1360 xn_txq_intr(txq); 1361 } 1362 1363 static void 1364 xn_move_rx_slot(struct netfront_rxq *rxq, struct mbuf *m, 1365 grant_ref_t ref) 1366 { 1367 int new = xn_rxidx(rxq->ring.req_prod_pvt); 1368 1369 KASSERT(rxq->mbufs[new] == NULL, ("mbufs != NULL")); 1370 rxq->mbufs[new] = m; 1371 rxq->grant_ref[new] = ref; 1372 RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->id = new; 1373 RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->gref = ref; 1374 rxq->ring.req_prod_pvt++; 1375 } 1376 1377 static int 1378 xn_get_extras(struct netfront_rxq *rxq, 1379 struct netif_extra_info *extras, RING_IDX rp, RING_IDX *cons) 1380 { 1381 struct netif_extra_info *extra; 1382 1383 int err = 0; 1384 1385 do { 1386 struct mbuf *m; 1387 grant_ref_t ref; 1388 1389 if (__predict_false(*cons + 1 == rp)) { 1390 err = EINVAL; 1391 break; 1392 } 1393 1394 extra = (struct netif_extra_info *) 1395 RING_GET_RESPONSE(&rxq->ring, ++(*cons)); 1396 1397 if (__predict_false(!extra->type || 1398 extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 1399 err = EINVAL; 1400 } else { 1401 memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); 1402 } 1403 1404 m = xn_get_rx_mbuf(rxq, *cons); 1405 ref = xn_get_rx_ref(rxq, *cons); 1406 xn_move_rx_slot(rxq, m, ref); 1407 } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); 1408 1409 return err; 1410 } 1411 1412 static int 1413 xn_get_responses(struct netfront_rxq *rxq, 1414 struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, 1415 struct mbuf **list) 1416 { 1417 struct netif_rx_response *rx = &rinfo->rx; 1418 struct netif_extra_info *extras = rinfo->extras; 1419 struct mbuf *m, *m0, *m_prev; 1420 grant_ref_t ref = xn_get_rx_ref(rxq, *cons); 1421 RING_IDX ref_cons = *cons; 1422 int frags = 1; 1423 int err = 0; 1424 u_long ret; 1425 1426 m0 = m = m_prev = xn_get_rx_mbuf(rxq, *cons); 1427 1428 if (rx->flags & NETRXF_extra_info) { 1429 err = xn_get_extras(rxq, extras, rp, cons); 1430 } 1431 1432 if (m0 != NULL) { 1433 m0->m_pkthdr.len = 0; 1434 m0->m_next = NULL; 1435 } 1436 1437 for (;;) { 1438 #if 0 1439 DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n", 1440 rx->status, rx->offset, frags); 1441 #endif 1442 if (__predict_false(rx->status < 0 || 1443 rx->offset + rx->status > PAGE_SIZE)) { 1444 1445 xn_move_rx_slot(rxq, m, ref); 1446 if (m0 == m) 1447 m0 = NULL; 1448 m = NULL; 1449 err = EINVAL; 1450 goto next_skip_queue; 1451 } 1452 1453 /* 1454 * This definitely indicates a bug, either in this driver or in 1455 * the backend driver. In future this should flag the bad 1456 * situation to the system controller to reboot the backed. 1457 */ 1458 if (ref == GRANT_REF_INVALID) { 1459 printf("%s: Bad rx response id %d.\n", __func__, rx->id); 1460 err = EINVAL; 1461 goto next; 1462 } 1463 1464 ret = gnttab_end_foreign_access_ref(ref); 1465 KASSERT(ret, ("Unable to end access to grant references")); 1466 1467 gnttab_release_grant_reference(&rxq->gref_head, ref); 1468 1469 next: 1470 if (m == NULL) 1471 break; 1472 1473 m->m_len = rx->status; 1474 m->m_data += rx->offset; 1475 m0->m_pkthdr.len += rx->status; 1476 1477 next_skip_queue: 1478 if (!(rx->flags & NETRXF_more_data)) 1479 break; 1480 1481 if (*cons + frags == rp) { 1482 if (net_ratelimit()) 1483 WPRINTK("Need more frags\n"); 1484 err = ENOENT; 1485 printf("%s: cons %u frags %u rp %u, not enough frags\n", 1486 __func__, *cons, frags, rp); 1487 break; 1488 } 1489 /* 1490 * Note that m can be NULL, if rx->status < 0 or if 1491 * rx->offset + rx->status > PAGE_SIZE above. 1492 */ 1493 m_prev = m; 1494 1495 rx = RING_GET_RESPONSE(&rxq->ring, *cons + frags); 1496 m = xn_get_rx_mbuf(rxq, *cons + frags); 1497 1498 /* 1499 * m_prev == NULL can happen if rx->status < 0 or if 1500 * rx->offset + * rx->status > PAGE_SIZE above. 1501 */ 1502 if (m_prev != NULL) 1503 m_prev->m_next = m; 1504 1505 /* 1506 * m0 can be NULL if rx->status < 0 or if * rx->offset + 1507 * rx->status > PAGE_SIZE above. 1508 */ 1509 if (m0 == NULL) 1510 m0 = m; 1511 m->m_next = NULL; 1512 ref = xn_get_rx_ref(rxq, *cons + frags); 1513 ref_cons = *cons + frags; 1514 frags++; 1515 } 1516 *list = m0; 1517 *cons += frags; 1518 1519 return (err); 1520 } 1521 1522 /** 1523 * \brief Count the number of fragments in an mbuf chain. 1524 * 1525 * Surprisingly, there isn't an M* macro for this. 1526 */ 1527 static inline int 1528 xn_count_frags(struct mbuf *m) 1529 { 1530 int nfrags; 1531 1532 for (nfrags = 0; m != NULL; m = m->m_next) 1533 nfrags++; 1534 1535 return (nfrags); 1536 } 1537 1538 /** 1539 * Given an mbuf chain, make sure we have enough room and then push 1540 * it onto the transmit ring. 1541 */ 1542 static int 1543 xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) 1544 { 1545 struct mbuf *m; 1546 struct netfront_info *np = txq->info; 1547 struct ifnet *ifp = np->xn_ifp; 1548 u_int nfrags; 1549 int otherend_id; 1550 1551 /** 1552 * Defragment the mbuf if necessary. 1553 */ 1554 nfrags = xn_count_frags(m_head); 1555 1556 /* 1557 * Check to see whether this request is longer than netback 1558 * can handle, and try to defrag it. 1559 */ 1560 /** 1561 * It is a bit lame, but the netback driver in Linux can't 1562 * deal with nfrags > MAX_TX_REQ_FRAGS, which is a quirk of 1563 * the Linux network stack. 1564 */ 1565 if (nfrags > np->maxfrags) { 1566 m = m_defrag(m_head, M_NOWAIT); 1567 if (!m) { 1568 /* 1569 * Defrag failed, so free the mbuf and 1570 * therefore drop the packet. 1571 */ 1572 m_freem(m_head); 1573 return (EMSGSIZE); 1574 } 1575 m_head = m; 1576 } 1577 1578 /* Determine how many fragments now exist */ 1579 nfrags = xn_count_frags(m_head); 1580 1581 /* 1582 * Check to see whether the defragmented packet has too many 1583 * segments for the Linux netback driver. 1584 */ 1585 /** 1586 * The FreeBSD TCP stack, with TSO enabled, can produce a chain 1587 * of mbufs longer than Linux can handle. Make sure we don't 1588 * pass a too-long chain over to the other side by dropping the 1589 * packet. It doesn't look like there is currently a way to 1590 * tell the TCP stack to generate a shorter chain of packets. 1591 */ 1592 if (nfrags > MAX_TX_REQ_FRAGS) { 1593 #ifdef DEBUG 1594 printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback " 1595 "won't be able to handle it, dropping\n", 1596 __func__, nfrags, MAX_TX_REQ_FRAGS); 1597 #endif 1598 m_freem(m_head); 1599 return (EMSGSIZE); 1600 } 1601 1602 /* 1603 * This check should be redundant. We've already verified that we 1604 * have enough slots in the ring to handle a packet of maximum 1605 * size, and that our packet is less than the maximum size. Keep 1606 * it in here as an assert for now just to make certain that 1607 * chain_cnt is accurate. 1608 */ 1609 KASSERT((txq->mbufs_cnt + nfrags) <= NET_TX_RING_SIZE, 1610 ("%s: chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE " 1611 "(%d)!", __func__, (int) txq->mbufs_cnt, 1612 (int) nfrags, (int) NET_TX_RING_SIZE)); 1613 1614 /* 1615 * Start packing the mbufs in this chain into 1616 * the fragment pointers. Stop when we run out 1617 * of fragments or hit the end of the mbuf chain. 1618 */ 1619 m = m_head; 1620 otherend_id = xenbus_get_otherend_id(np->xbdev); 1621 for (m = m_head; m; m = m->m_next) { 1622 netif_tx_request_t *tx; 1623 uintptr_t id; 1624 grant_ref_t ref; 1625 u_long mfn; /* XXX Wrong type? */ 1626 1627 tx = RING_GET_REQUEST(&txq->ring, txq->ring.req_prod_pvt); 1628 id = get_id_from_freelist(txq->mbufs); 1629 if (id == 0) 1630 panic("%s: was allocated the freelist head!\n", 1631 __func__); 1632 txq->mbufs_cnt++; 1633 if (txq->mbufs_cnt > NET_TX_RING_SIZE) 1634 panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n", 1635 __func__); 1636 txq->mbufs[id] = m; 1637 tx->id = id; 1638 ref = gnttab_claim_grant_reference(&txq->gref_head); 1639 KASSERT((short)ref >= 0, ("Negative ref")); 1640 mfn = virt_to_mfn(mtod(m, vm_offset_t)); 1641 gnttab_grant_foreign_access_ref(ref, otherend_id, 1642 mfn, GNTMAP_readonly); 1643 tx->gref = txq->grant_ref[id] = ref; 1644 tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1); 1645 tx->flags = 0; 1646 if (m == m_head) { 1647 /* 1648 * The first fragment has the entire packet 1649 * size, subsequent fragments have just the 1650 * fragment size. The backend works out the 1651 * true size of the first fragment by 1652 * subtracting the sizes of the other 1653 * fragments. 1654 */ 1655 tx->size = m->m_pkthdr.len; 1656 1657 /* 1658 * The first fragment contains the checksum flags 1659 * and is optionally followed by extra data for 1660 * TSO etc. 1661 */ 1662 /** 1663 * CSUM_TSO requires checksum offloading. 1664 * Some versions of FreeBSD fail to 1665 * set CSUM_TCP in the CSUM_TSO case, 1666 * so we have to test for CSUM_TSO 1667 * explicitly. 1668 */ 1669 if (m->m_pkthdr.csum_flags 1670 & (CSUM_DELAY_DATA | CSUM_TSO)) { 1671 tx->flags |= (NETTXF_csum_blank 1672 | NETTXF_data_validated); 1673 } 1674 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 1675 struct netif_extra_info *gso = 1676 (struct netif_extra_info *) 1677 RING_GET_REQUEST(&txq->ring, 1678 ++txq->ring.req_prod_pvt); 1679 1680 tx->flags |= NETTXF_extra_info; 1681 1682 gso->u.gso.size = m->m_pkthdr.tso_segsz; 1683 gso->u.gso.type = 1684 XEN_NETIF_GSO_TYPE_TCPV4; 1685 gso->u.gso.pad = 0; 1686 gso->u.gso.features = 0; 1687 1688 gso->type = XEN_NETIF_EXTRA_TYPE_GSO; 1689 gso->flags = 0; 1690 } 1691 } else { 1692 tx->size = m->m_len; 1693 } 1694 if (m->m_next) 1695 tx->flags |= NETTXF_more_data; 1696 1697 txq->ring.req_prod_pvt++; 1698 } 1699 BPF_MTAP(ifp, m_head); 1700 1701 xn_txeof(txq); 1702 1703 txq->stats.tx_bytes += m_head->m_pkthdr.len; 1704 txq->stats.tx_packets++; 1705 1706 return (0); 1707 } 1708 1709 /* equivalent of network_open() in Linux */ 1710 static void 1711 xn_ifinit_locked(struct netfront_info *np) 1712 { 1713 struct ifnet *ifp; 1714 int i; 1715 struct netfront_rxq *rxq; 1716 1717 XN_LOCK_ASSERT(np); 1718 1719 ifp = np->xn_ifp; 1720 1721 if (ifp->if_drv_flags & IFF_DRV_RUNNING || !netfront_carrier_ok(np)) 1722 return; 1723 1724 xn_stop(np); 1725 1726 for (i = 0; i < np->num_queues; i++) { 1727 rxq = &np->rxq[i]; 1728 XN_RX_LOCK(rxq); 1729 xn_alloc_rx_buffers(rxq); 1730 rxq->ring.sring->rsp_event = rxq->ring.rsp_cons + 1; 1731 if (RING_HAS_UNCONSUMED_RESPONSES(&rxq->ring)) 1732 xn_rxeof(rxq); 1733 XN_RX_UNLOCK(rxq); 1734 } 1735 1736 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1737 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1738 if_link_state_change(ifp, LINK_STATE_UP); 1739 } 1740 1741 static void 1742 xn_ifinit(void *xsc) 1743 { 1744 struct netfront_info *sc = xsc; 1745 1746 XN_LOCK(sc); 1747 xn_ifinit_locked(sc); 1748 XN_UNLOCK(sc); 1749 } 1750 1751 static int 1752 xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1753 { 1754 struct netfront_info *sc = ifp->if_softc; 1755 struct ifreq *ifr = (struct ifreq *) data; 1756 device_t dev; 1757 #ifdef INET 1758 struct ifaddr *ifa = (struct ifaddr *)data; 1759 #endif 1760 int mask, error = 0; 1761 1762 dev = sc->xbdev; 1763 1764 switch(cmd) { 1765 case SIOCSIFADDR: 1766 #ifdef INET 1767 XN_LOCK(sc); 1768 if (ifa->ifa_addr->sa_family == AF_INET) { 1769 ifp->if_flags |= IFF_UP; 1770 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 1771 xn_ifinit_locked(sc); 1772 arp_ifinit(ifp, ifa); 1773 XN_UNLOCK(sc); 1774 } else { 1775 XN_UNLOCK(sc); 1776 #endif 1777 error = ether_ioctl(ifp, cmd, data); 1778 #ifdef INET 1779 } 1780 #endif 1781 break; 1782 case SIOCSIFMTU: 1783 ifp->if_mtu = ifr->ifr_mtu; 1784 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1785 xn_ifinit(sc); 1786 break; 1787 case SIOCSIFFLAGS: 1788 XN_LOCK(sc); 1789 if (ifp->if_flags & IFF_UP) { 1790 /* 1791 * If only the state of the PROMISC flag changed, 1792 * then just use the 'set promisc mode' command 1793 * instead of reinitializing the entire NIC. Doing 1794 * a full re-init means reloading the firmware and 1795 * waiting for it to start up, which may take a 1796 * second or two. 1797 */ 1798 xn_ifinit_locked(sc); 1799 } else { 1800 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1801 xn_stop(sc); 1802 } 1803 } 1804 sc->xn_if_flags = ifp->if_flags; 1805 XN_UNLOCK(sc); 1806 break; 1807 case SIOCSIFCAP: 1808 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 1809 if (mask & IFCAP_TXCSUM) { 1810 if (IFCAP_TXCSUM & ifp->if_capenable) { 1811 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 1812 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 1813 | CSUM_IP | CSUM_TSO); 1814 } else { 1815 ifp->if_capenable |= IFCAP_TXCSUM; 1816 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP 1817 | CSUM_IP); 1818 } 1819 } 1820 if (mask & IFCAP_RXCSUM) { 1821 ifp->if_capenable ^= IFCAP_RXCSUM; 1822 } 1823 if (mask & IFCAP_TSO4) { 1824 if (IFCAP_TSO4 & ifp->if_capenable) { 1825 ifp->if_capenable &= ~IFCAP_TSO4; 1826 ifp->if_hwassist &= ~CSUM_TSO; 1827 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 1828 ifp->if_capenable |= IFCAP_TSO4; 1829 ifp->if_hwassist |= CSUM_TSO; 1830 } else { 1831 IPRINTK("Xen requires tx checksum offload" 1832 " be enabled to use TSO\n"); 1833 error = EINVAL; 1834 } 1835 } 1836 if (mask & IFCAP_LRO) { 1837 ifp->if_capenable ^= IFCAP_LRO; 1838 1839 } 1840 /* 1841 * We must reset the interface so the backend picks up the 1842 * new features. 1843 */ 1844 XN_LOCK(sc); 1845 netfront_carrier_off(sc); 1846 sc->xn_reset = true; 1847 /* 1848 * NB: the pending packet queue is not flushed, since 1849 * the interface should still support the old options. 1850 */ 1851 XN_UNLOCK(sc); 1852 /* 1853 * Delete the xenstore nodes that export features. 1854 * 1855 * NB: There's a xenbus state called 1856 * "XenbusStateReconfiguring", which is what we should set 1857 * here. Sadly none of the backends know how to handle it, 1858 * and simply disconnect from the frontend, so we will just 1859 * switch back to XenbusStateInitialising in order to force 1860 * a reconnection. 1861 */ 1862 xs_rm(XST_NIL, xenbus_get_node(dev), "feature-gso-tcpv4"); 1863 xs_rm(XST_NIL, xenbus_get_node(dev), "feature-no-csum-offload"); 1864 xenbus_set_state(dev, XenbusStateClosing); 1865 break; 1866 case SIOCADDMULTI: 1867 case SIOCDELMULTI: 1868 break; 1869 case SIOCSIFMEDIA: 1870 case SIOCGIFMEDIA: 1871 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 1872 break; 1873 default: 1874 error = ether_ioctl(ifp, cmd, data); 1875 } 1876 1877 return (error); 1878 } 1879 1880 static void 1881 xn_stop(struct netfront_info *sc) 1882 { 1883 struct ifnet *ifp; 1884 1885 XN_LOCK_ASSERT(sc); 1886 1887 ifp = sc->xn_ifp; 1888 1889 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 1890 if_link_state_change(ifp, LINK_STATE_DOWN); 1891 } 1892 1893 static void 1894 xn_rebuild_rx_bufs(struct netfront_rxq *rxq) 1895 { 1896 int requeue_idx, i; 1897 grant_ref_t ref; 1898 netif_rx_request_t *req; 1899 1900 for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { 1901 struct mbuf *m; 1902 u_long pfn; 1903 1904 if (rxq->mbufs[i] == NULL) 1905 continue; 1906 1907 m = rxq->mbufs[requeue_idx] = xn_get_rx_mbuf(rxq, i); 1908 ref = rxq->grant_ref[requeue_idx] = xn_get_rx_ref(rxq, i); 1909 1910 req = RING_GET_REQUEST(&rxq->ring, requeue_idx); 1911 pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT; 1912 1913 gnttab_grant_foreign_access_ref(ref, 1914 xenbus_get_otherend_id(rxq->info->xbdev), 1915 pfn, 0); 1916 1917 req->gref = ref; 1918 req->id = requeue_idx; 1919 1920 requeue_idx++; 1921 } 1922 1923 rxq->ring.req_prod_pvt = requeue_idx; 1924 } 1925 1926 /* START of Xenolinux helper functions adapted to FreeBSD */ 1927 int 1928 xn_connect(struct netfront_info *np) 1929 { 1930 int i, error; 1931 u_int feature_rx_copy; 1932 struct netfront_rxq *rxq; 1933 struct netfront_txq *txq; 1934 1935 error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 1936 "feature-rx-copy", NULL, "%u", &feature_rx_copy); 1937 if (error != 0) 1938 feature_rx_copy = 0; 1939 1940 /* We only support rx copy. */ 1941 if (!feature_rx_copy) 1942 return (EPROTONOSUPPORT); 1943 1944 /* Recovery procedure: */ 1945 error = talk_to_backend(np->xbdev, np); 1946 if (error != 0) 1947 return (error); 1948 1949 /* Step 1: Reinitialise variables. */ 1950 xn_query_features(np); 1951 xn_configure_features(np); 1952 1953 /* Step 2: Release TX buffer */ 1954 for (i = 0; i < np->num_queues; i++) { 1955 txq = &np->txq[i]; 1956 xn_release_tx_bufs(txq); 1957 } 1958 1959 /* Step 3: Rebuild the RX buffer freelist and the RX ring itself. */ 1960 for (i = 0; i < np->num_queues; i++) { 1961 rxq = &np->rxq[i]; 1962 xn_rebuild_rx_bufs(rxq); 1963 } 1964 1965 /* Step 4: All public and private state should now be sane. Get 1966 * ready to start sending and receiving packets and give the driver 1967 * domain a kick because we've probably just requeued some 1968 * packets. 1969 */ 1970 netfront_carrier_on(np); 1971 for (i = 0; i < np->num_queues; i++) { 1972 txq = &np->txq[i]; 1973 xen_intr_signal(txq->xen_intr_handle); 1974 XN_TX_LOCK(txq); 1975 xn_txeof(txq); 1976 XN_TX_UNLOCK(txq); 1977 XN_RX_LOCK(rxq); 1978 xn_alloc_rx_buffers(rxq); 1979 XN_RX_UNLOCK(rxq); 1980 } 1981 1982 return (0); 1983 } 1984 1985 static void 1986 xn_query_features(struct netfront_info *np) 1987 { 1988 int val; 1989 1990 device_printf(np->xbdev, "backend features:"); 1991 1992 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 1993 "feature-sg", NULL, "%d", &val) != 0) 1994 val = 0; 1995 1996 np->maxfrags = 1; 1997 if (val) { 1998 np->maxfrags = MAX_TX_REQ_FRAGS; 1999 printf(" feature-sg"); 2000 } 2001 2002 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2003 "feature-gso-tcpv4", NULL, "%d", &val) != 0) 2004 val = 0; 2005 2006 np->xn_ifp->if_capabilities &= ~(IFCAP_TSO4|IFCAP_LRO); 2007 if (val) { 2008 np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO; 2009 printf(" feature-gso-tcp4"); 2010 } 2011 2012 /* 2013 * HW CSUM offload is assumed to be available unless 2014 * feature-no-csum-offload is set in xenstore. 2015 */ 2016 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2017 "feature-no-csum-offload", NULL, "%d", &val) != 0) 2018 val = 0; 2019 2020 np->xn_ifp->if_capabilities |= IFCAP_HWCSUM; 2021 if (val) { 2022 np->xn_ifp->if_capabilities &= ~(IFCAP_HWCSUM); 2023 printf(" feature-no-csum-offload"); 2024 } 2025 2026 printf("\n"); 2027 } 2028 2029 static int 2030 xn_configure_features(struct netfront_info *np) 2031 { 2032 int err, cap_enabled; 2033 #if (defined(INET) || defined(INET6)) 2034 int i; 2035 #endif 2036 struct ifnet *ifp; 2037 2038 ifp = np->xn_ifp; 2039 err = 0; 2040 2041 if ((ifp->if_capenable & ifp->if_capabilities) == ifp->if_capenable) { 2042 /* Current options are available, no need to do anything. */ 2043 return (0); 2044 } 2045 2046 /* Try to preserve as many options as possible. */ 2047 cap_enabled = ifp->if_capenable; 2048 ifp->if_capenable = ifp->if_hwassist = 0; 2049 2050 #if (defined(INET) || defined(INET6)) 2051 if ((cap_enabled & IFCAP_LRO) != 0) 2052 for (i = 0; i < np->num_queues; i++) 2053 tcp_lro_free(&np->rxq[i].lro); 2054 if (xn_enable_lro && 2055 (ifp->if_capabilities & cap_enabled & IFCAP_LRO) != 0) { 2056 ifp->if_capenable |= IFCAP_LRO; 2057 for (i = 0; i < np->num_queues; i++) { 2058 err = tcp_lro_init(&np->rxq[i].lro); 2059 if (err != 0) { 2060 device_printf(np->xbdev, 2061 "LRO initialization failed\n"); 2062 ifp->if_capenable &= ~IFCAP_LRO; 2063 break; 2064 } 2065 np->rxq[i].lro.ifp = ifp; 2066 } 2067 } 2068 if ((ifp->if_capabilities & cap_enabled & IFCAP_TSO4) != 0) { 2069 ifp->if_capenable |= IFCAP_TSO4; 2070 ifp->if_hwassist |= CSUM_TSO; 2071 } 2072 #endif 2073 if ((ifp->if_capabilities & cap_enabled & IFCAP_TXCSUM) != 0) { 2074 ifp->if_capenable |= IFCAP_TXCSUM; 2075 ifp->if_hwassist |= CSUM_TCP|CSUM_UDP; 2076 } 2077 if ((ifp->if_capabilities & cap_enabled & IFCAP_RXCSUM) != 0) 2078 ifp->if_capenable |= IFCAP_RXCSUM; 2079 2080 return (err); 2081 } 2082 2083 static int 2084 xn_txq_mq_start_locked(struct netfront_txq *txq, struct mbuf *m) 2085 { 2086 struct netfront_info *np; 2087 struct ifnet *ifp; 2088 struct buf_ring *br; 2089 int error, notify; 2090 2091 np = txq->info; 2092 br = txq->br; 2093 ifp = np->xn_ifp; 2094 error = 0; 2095 2096 XN_TX_LOCK_ASSERT(txq); 2097 2098 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2099 !netfront_carrier_ok(np)) { 2100 if (m != NULL) 2101 error = drbr_enqueue(ifp, br, m); 2102 return (error); 2103 } 2104 2105 if (m != NULL) { 2106 error = drbr_enqueue(ifp, br, m); 2107 if (error != 0) 2108 return (error); 2109 } 2110 2111 while ((m = drbr_peek(ifp, br)) != NULL) { 2112 if (!xn_tx_slot_available(txq)) { 2113 drbr_putback(ifp, br, m); 2114 break; 2115 } 2116 2117 error = xn_assemble_tx_request(txq, m); 2118 /* xn_assemble_tx_request always consumes the mbuf*/ 2119 if (error != 0) { 2120 drbr_advance(ifp, br); 2121 break; 2122 } 2123 2124 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&txq->ring, notify); 2125 if (notify) 2126 xen_intr_signal(txq->xen_intr_handle); 2127 2128 drbr_advance(ifp, br); 2129 } 2130 2131 if (RING_FULL(&txq->ring)) 2132 txq->full = true; 2133 2134 return (0); 2135 } 2136 2137 static int 2138 xn_txq_mq_start(struct ifnet *ifp, struct mbuf *m) 2139 { 2140 struct netfront_info *np; 2141 struct netfront_txq *txq; 2142 int i, npairs, error; 2143 2144 np = ifp->if_softc; 2145 npairs = np->num_queues; 2146 2147 KASSERT(npairs != 0, ("called with 0 available queues")); 2148 2149 /* check if flowid is set */ 2150 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 2151 i = m->m_pkthdr.flowid % npairs; 2152 else 2153 i = curcpu % npairs; 2154 2155 txq = &np->txq[i]; 2156 2157 if (XN_TX_TRYLOCK(txq) != 0) { 2158 error = xn_txq_mq_start_locked(txq, m); 2159 XN_TX_UNLOCK(txq); 2160 } else { 2161 error = drbr_enqueue(ifp, txq->br, m); 2162 taskqueue_enqueue(txq->tq, &txq->defrtask); 2163 } 2164 2165 return (error); 2166 } 2167 2168 static void 2169 xn_qflush(struct ifnet *ifp) 2170 { 2171 struct netfront_info *np; 2172 struct netfront_txq *txq; 2173 struct mbuf *m; 2174 int i; 2175 2176 np = ifp->if_softc; 2177 2178 for (i = 0; i < np->num_queues; i++) { 2179 txq = &np->txq[i]; 2180 2181 XN_TX_LOCK(txq); 2182 while ((m = buf_ring_dequeue_sc(txq->br)) != NULL) 2183 m_freem(m); 2184 XN_TX_UNLOCK(txq); 2185 } 2186 2187 if_qflush(ifp); 2188 } 2189 2190 /** 2191 * Create a network device. 2192 * @param dev Newbus device representing this virtual NIC. 2193 */ 2194 int 2195 create_netdev(device_t dev) 2196 { 2197 struct netfront_info *np; 2198 int err; 2199 struct ifnet *ifp; 2200 2201 np = device_get_softc(dev); 2202 2203 np->xbdev = dev; 2204 2205 mtx_init(&np->sc_lock, "xnsc", "netfront softc lock", MTX_DEF); 2206 2207 ifmedia_init(&np->sc_media, 0, xn_ifmedia_upd, xn_ifmedia_sts); 2208 ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); 2209 ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL); 2210 2211 err = xen_net_read_mac(dev, np->mac); 2212 if (err != 0) 2213 goto error; 2214 2215 /* Set up ifnet structure */ 2216 ifp = np->xn_ifp = if_alloc(IFT_ETHER); 2217 ifp->if_softc = np; 2218 if_initname(ifp, "xn", device_get_unit(dev)); 2219 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 2220 ifp->if_ioctl = xn_ioctl; 2221 2222 ifp->if_transmit = xn_txq_mq_start; 2223 ifp->if_qflush = xn_qflush; 2224 2225 ifp->if_init = xn_ifinit; 2226 2227 ifp->if_hwassist = XN_CSUM_FEATURES; 2228 /* Enable all supported features at device creation. */ 2229 ifp->if_capenable = ifp->if_capabilities = 2230 IFCAP_HWCSUM|IFCAP_TSO4|IFCAP_LRO; 2231 ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 2232 ifp->if_hw_tsomaxsegcount = MAX_TX_REQ_FRAGS; 2233 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 2234 2235 ether_ifattach(ifp, np->mac); 2236 netfront_carrier_off(np); 2237 2238 return (0); 2239 2240 error: 2241 KASSERT(err != 0, ("Error path with no error code specified")); 2242 return (err); 2243 } 2244 2245 static int 2246 netfront_detach(device_t dev) 2247 { 2248 struct netfront_info *info = device_get_softc(dev); 2249 2250 DPRINTK("%s\n", xenbus_get_node(dev)); 2251 2252 netif_free(info); 2253 2254 return 0; 2255 } 2256 2257 static void 2258 netif_free(struct netfront_info *np) 2259 { 2260 2261 XN_LOCK(np); 2262 xn_stop(np); 2263 XN_UNLOCK(np); 2264 netif_disconnect_backend(np); 2265 ether_ifdetach(np->xn_ifp); 2266 free(np->rxq, M_DEVBUF); 2267 free(np->txq, M_DEVBUF); 2268 if_free(np->xn_ifp); 2269 np->xn_ifp = NULL; 2270 ifmedia_removeall(&np->sc_media); 2271 } 2272 2273 static void 2274 netif_disconnect_backend(struct netfront_info *np) 2275 { 2276 u_int i; 2277 2278 for (i = 0; i < np->num_queues; i++) { 2279 XN_RX_LOCK(&np->rxq[i]); 2280 XN_TX_LOCK(&np->txq[i]); 2281 } 2282 netfront_carrier_off(np); 2283 for (i = 0; i < np->num_queues; i++) { 2284 XN_RX_UNLOCK(&np->rxq[i]); 2285 XN_TX_UNLOCK(&np->txq[i]); 2286 } 2287 2288 for (i = 0; i < np->num_queues; i++) { 2289 disconnect_rxq(&np->rxq[i]); 2290 disconnect_txq(&np->txq[i]); 2291 } 2292 } 2293 2294 static int 2295 xn_ifmedia_upd(struct ifnet *ifp) 2296 { 2297 2298 return (0); 2299 } 2300 2301 static void 2302 xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 2303 { 2304 2305 ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE; 2306 ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; 2307 } 2308 2309 /* ** Driver registration ** */ 2310 static device_method_t netfront_methods[] = { 2311 /* Device interface */ 2312 DEVMETHOD(device_probe, netfront_probe), 2313 DEVMETHOD(device_attach, netfront_attach), 2314 DEVMETHOD(device_detach, netfront_detach), 2315 DEVMETHOD(device_shutdown, bus_generic_shutdown), 2316 DEVMETHOD(device_suspend, netfront_suspend), 2317 DEVMETHOD(device_resume, netfront_resume), 2318 2319 /* Xenbus interface */ 2320 DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed), 2321 2322 DEVMETHOD_END 2323 }; 2324 2325 static driver_t netfront_driver = { 2326 "xn", 2327 netfront_methods, 2328 sizeof(struct netfront_info), 2329 }; 2330 devclass_t netfront_devclass; 2331 2332 DRIVER_MODULE(xe, xenbusb_front, netfront_driver, netfront_devclass, NULL, 2333 NULL); 2334