1 /*- 2 * Copyright (c) 2004-2006 Kip Macy 3 * Copyright (c) 2015 Wei Liu <wei.liu2@citrix.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/sockio.h> 36 #include <sys/limits.h> 37 #include <sys/mbuf.h> 38 #include <sys/malloc.h> 39 #include <sys/module.h> 40 #include <sys/kernel.h> 41 #include <sys/socket.h> 42 #include <sys/sysctl.h> 43 #include <sys/taskqueue.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_arp.h> 48 #include <net/ethernet.h> 49 #include <net/if_media.h> 50 #include <net/bpf.h> 51 #include <net/if_types.h> 52 53 #include <netinet/in.h> 54 #include <netinet/ip.h> 55 #include <netinet/if_ether.h> 56 #include <netinet/tcp.h> 57 #include <netinet/tcp_lro.h> 58 59 #include <vm/vm.h> 60 #include <vm/pmap.h> 61 62 #include <sys/bus.h> 63 64 #include <xen/xen-os.h> 65 #include <xen/hypervisor.h> 66 #include <xen/xen_intr.h> 67 #include <xen/gnttab.h> 68 #include <xen/interface/memory.h> 69 #include <xen/interface/io/netif.h> 70 #include <xen/xenbus/xenbusvar.h> 71 72 #include "xenbus_if.h" 73 74 /* Features supported by all backends. TSO and LRO can be negotiated */ 75 #define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) 76 77 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) 78 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) 79 80 #define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1) 81 82 /* 83 * Should the driver do LRO on the RX end 84 * this can be toggled on the fly, but the 85 * interface must be reset (down/up) for it 86 * to take effect. 87 */ 88 static int xn_enable_lro = 1; 89 TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro); 90 91 /* 92 * Number of pairs of queues. 93 */ 94 static unsigned long xn_num_queues = 4; 95 TUNABLE_ULONG("hw.xn.num_queues", &xn_num_queues); 96 97 /** 98 * \brief The maximum allowed data fragments in a single transmit 99 * request. 100 * 101 * This limit is imposed by the backend driver. We assume here that 102 * we are dealing with a Linux driver domain and have set our limit 103 * to mirror the Linux MAX_SKB_FRAGS constant. 104 */ 105 #define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2) 106 107 #define RX_COPY_THRESHOLD 256 108 109 #define net_ratelimit() 0 110 111 struct netfront_rxq; 112 struct netfront_txq; 113 struct netfront_info; 114 struct netfront_rx_info; 115 116 static void xn_txeof(struct netfront_txq *); 117 static void xn_rxeof(struct netfront_rxq *); 118 static void xn_alloc_rx_buffers(struct netfront_rxq *); 119 static void xn_alloc_rx_buffers_callout(void *arg); 120 121 static void xn_release_rx_bufs(struct netfront_rxq *); 122 static void xn_release_tx_bufs(struct netfront_txq *); 123 124 static void xn_rxq_intr(void *); 125 static void xn_txq_intr(void *); 126 static int xn_intr(void *); 127 static inline int xn_count_frags(struct mbuf *m); 128 static int xn_assemble_tx_request(struct netfront_txq *, struct mbuf *); 129 static int xn_ioctl(struct ifnet *, u_long, caddr_t); 130 static void xn_ifinit_locked(struct netfront_info *); 131 static void xn_ifinit(void *); 132 static void xn_stop(struct netfront_info *); 133 static void xn_query_features(struct netfront_info *np); 134 static int xn_configure_features(struct netfront_info *np); 135 static void netif_free(struct netfront_info *info); 136 static int netfront_detach(device_t dev); 137 138 static int xn_txq_mq_start_locked(struct netfront_txq *, struct mbuf *); 139 static int xn_txq_mq_start(struct ifnet *, struct mbuf *); 140 141 static int talk_to_backend(device_t dev, struct netfront_info *info); 142 static int create_netdev(device_t dev); 143 static void netif_disconnect_backend(struct netfront_info *info); 144 static int setup_device(device_t dev, struct netfront_info *info, 145 unsigned long); 146 static int xn_ifmedia_upd(struct ifnet *ifp); 147 static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); 148 149 int xn_connect(struct netfront_info *); 150 151 static int xn_get_responses(struct netfront_rxq *, 152 struct netfront_rx_info *, RING_IDX, RING_IDX *, 153 struct mbuf **); 154 155 #define virt_to_mfn(x) (vtophys(x) >> PAGE_SHIFT) 156 157 #define INVALID_P2M_ENTRY (~0UL) 158 159 struct xn_rx_stats 160 { 161 u_long rx_packets; /* total packets received */ 162 u_long rx_bytes; /* total bytes received */ 163 u_long rx_errors; /* bad packets received */ 164 }; 165 166 struct xn_tx_stats 167 { 168 u_long tx_packets; /* total packets transmitted */ 169 u_long tx_bytes; /* total bytes transmitted */ 170 u_long tx_errors; /* packet transmit problems */ 171 }; 172 173 #define XN_QUEUE_NAME_LEN 8 /* xn{t,r}x_%u, allow for two digits */ 174 struct netfront_rxq { 175 struct netfront_info *info; 176 u_int id; 177 char name[XN_QUEUE_NAME_LEN]; 178 struct mtx lock; 179 180 int ring_ref; 181 netif_rx_front_ring_t ring; 182 xen_intr_handle_t xen_intr_handle; 183 184 grant_ref_t gref_head; 185 grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; 186 187 struct mbuf *mbufs[NET_RX_RING_SIZE + 1]; 188 189 struct lro_ctrl lro; 190 191 struct taskqueue *tq; 192 struct task intrtask; 193 194 struct callout rx_refill; 195 196 struct xn_rx_stats stats; 197 }; 198 199 struct netfront_txq { 200 struct netfront_info *info; 201 u_int id; 202 char name[XN_QUEUE_NAME_LEN]; 203 struct mtx lock; 204 205 int ring_ref; 206 netif_tx_front_ring_t ring; 207 xen_intr_handle_t xen_intr_handle; 208 209 grant_ref_t gref_head; 210 grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; 211 212 struct mbuf *mbufs[NET_TX_RING_SIZE + 1]; 213 int mbufs_cnt; 214 struct buf_ring *br; 215 216 struct taskqueue *tq; 217 struct task intrtask; 218 struct task defrtask; 219 220 bool full; 221 222 struct xn_tx_stats stats; 223 }; 224 225 struct netfront_info { 226 struct ifnet *xn_ifp; 227 228 struct mtx sc_lock; 229 230 u_int num_queues; 231 struct netfront_rxq *rxq; 232 struct netfront_txq *txq; 233 234 u_int carrier; 235 u_int maxfrags; 236 237 device_t xbdev; 238 uint8_t mac[ETHER_ADDR_LEN]; 239 240 int xn_if_flags; 241 242 struct ifmedia sc_media; 243 244 bool xn_resume; 245 }; 246 247 struct netfront_rx_info { 248 struct netif_rx_response rx; 249 struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; 250 }; 251 252 #define XN_RX_LOCK(_q) mtx_lock(&(_q)->lock) 253 #define XN_RX_UNLOCK(_q) mtx_unlock(&(_q)->lock) 254 255 #define XN_TX_LOCK(_q) mtx_lock(&(_q)->lock) 256 #define XN_TX_TRYLOCK(_q) mtx_trylock(&(_q)->lock) 257 #define XN_TX_UNLOCK(_q) mtx_unlock(&(_q)->lock) 258 259 #define XN_LOCK(_sc) mtx_lock(&(_sc)->sc_lock); 260 #define XN_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_lock); 261 262 #define XN_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_lock, MA_OWNED); 263 #define XN_RX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); 264 #define XN_TX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); 265 266 #define netfront_carrier_on(netif) ((netif)->carrier = 1) 267 #define netfront_carrier_off(netif) ((netif)->carrier = 0) 268 #define netfront_carrier_ok(netif) ((netif)->carrier) 269 270 /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */ 271 272 static inline void 273 add_id_to_freelist(struct mbuf **list, uintptr_t id) 274 { 275 276 KASSERT(id != 0, 277 ("%s: the head item (0) must always be free.", __func__)); 278 list[id] = list[0]; 279 list[0] = (struct mbuf *)id; 280 } 281 282 static inline unsigned short 283 get_id_from_freelist(struct mbuf **list) 284 { 285 uintptr_t id; 286 287 id = (uintptr_t)list[0]; 288 KASSERT(id != 0, 289 ("%s: the head item (0) must always remain free.", __func__)); 290 list[0] = list[id]; 291 return (id); 292 } 293 294 static inline int 295 xn_rxidx(RING_IDX idx) 296 { 297 298 return idx & (NET_RX_RING_SIZE - 1); 299 } 300 301 static inline struct mbuf * 302 xn_get_rx_mbuf(struct netfront_rxq *rxq, RING_IDX ri) 303 { 304 int i; 305 struct mbuf *m; 306 307 i = xn_rxidx(ri); 308 m = rxq->mbufs[i]; 309 rxq->mbufs[i] = NULL; 310 return (m); 311 } 312 313 static inline grant_ref_t 314 xn_get_rx_ref(struct netfront_rxq *rxq, RING_IDX ri) 315 { 316 int i = xn_rxidx(ri); 317 grant_ref_t ref = rxq->grant_ref[i]; 318 319 KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n")); 320 rxq->grant_ref[i] = GRANT_REF_INVALID; 321 return (ref); 322 } 323 324 #define IPRINTK(fmt, args...) \ 325 printf("[XEN] " fmt, ##args) 326 #ifdef INVARIANTS 327 #define WPRINTK(fmt, args...) \ 328 printf("[XEN] " fmt, ##args) 329 #else 330 #define WPRINTK(fmt, args...) 331 #endif 332 #ifdef DEBUG 333 #define DPRINTK(fmt, args...) \ 334 printf("[XEN] %s: " fmt, __func__, ##args) 335 #else 336 #define DPRINTK(fmt, args...) 337 #endif 338 339 /** 340 * Read the 'mac' node at the given device's node in the store, and parse that 341 * as colon-separated octets, placing result the given mac array. mac must be 342 * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h). 343 * Return 0 on success, or errno on error. 344 */ 345 static int 346 xen_net_read_mac(device_t dev, uint8_t mac[]) 347 { 348 int error, i; 349 char *s, *e, *macstr; 350 const char *path; 351 352 path = xenbus_get_node(dev); 353 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 354 if (error == ENOENT) { 355 /* 356 * Deal with missing mac XenStore nodes on devices with 357 * HVM emulation (the 'ioemu' configuration attribute) 358 * enabled. 359 * 360 * The HVM emulator may execute in a stub device model 361 * domain which lacks the permission, only given to Dom0, 362 * to update the guest's XenStore tree. For this reason, 363 * the HVM emulator doesn't even attempt to write the 364 * front-side mac node, even when operating in Dom0. 365 * However, there should always be a mac listed in the 366 * backend tree. Fallback to this version if our query 367 * of the front side XenStore location doesn't find 368 * anything. 369 */ 370 path = xenbus_get_otherend_path(dev); 371 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 372 } 373 if (error != 0) { 374 xenbus_dev_fatal(dev, error, "parsing %s/mac", path); 375 return (error); 376 } 377 378 s = macstr; 379 for (i = 0; i < ETHER_ADDR_LEN; i++) { 380 mac[i] = strtoul(s, &e, 16); 381 if (s == e || (e[0] != ':' && e[0] != 0)) { 382 free(macstr, M_XENBUS); 383 return (ENOENT); 384 } 385 s = &e[1]; 386 } 387 free(macstr, M_XENBUS); 388 return (0); 389 } 390 391 /** 392 * Entry point to this code when a new device is created. Allocate the basic 393 * structures and the ring buffers for communication with the backend, and 394 * inform the backend of the appropriate details for those. Switch to 395 * Connected state. 396 */ 397 static int 398 netfront_probe(device_t dev) 399 { 400 401 if (xen_hvm_domain() && xen_disable_pv_nics != 0) 402 return (ENXIO); 403 404 if (!strcmp(xenbus_get_type(dev), "vif")) { 405 device_set_desc(dev, "Virtual Network Interface"); 406 return (0); 407 } 408 409 return (ENXIO); 410 } 411 412 static int 413 netfront_attach(device_t dev) 414 { 415 int err; 416 417 err = create_netdev(dev); 418 if (err != 0) { 419 xenbus_dev_fatal(dev, err, "creating netdev"); 420 return (err); 421 } 422 423 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), 424 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 425 OID_AUTO, "enable_lro", CTLFLAG_RW, 426 &xn_enable_lro, 0, "Large Receive Offload"); 427 428 SYSCTL_ADD_ULONG(device_get_sysctl_ctx(dev), 429 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 430 OID_AUTO, "num_queues", CTLFLAG_RD, 431 &xn_num_queues, "Number of pairs of queues"); 432 433 return (0); 434 } 435 436 static int 437 netfront_suspend(device_t dev) 438 { 439 struct netfront_info *np = device_get_softc(dev); 440 u_int i; 441 442 for (i = 0; i < np->num_queues; i++) { 443 XN_RX_LOCK(&np->rxq[i]); 444 XN_TX_LOCK(&np->txq[i]); 445 } 446 netfront_carrier_off(np); 447 for (i = 0; i < np->num_queues; i++) { 448 XN_RX_UNLOCK(&np->rxq[i]); 449 XN_TX_UNLOCK(&np->txq[i]); 450 } 451 return (0); 452 } 453 454 /** 455 * We are reconnecting to the backend, due to a suspend/resume, or a backend 456 * driver restart. We tear down our netif structure and recreate it, but 457 * leave the device-layer structures intact so that this is transparent to the 458 * rest of the kernel. 459 */ 460 static int 461 netfront_resume(device_t dev) 462 { 463 struct netfront_info *info = device_get_softc(dev); 464 465 info->xn_resume = true; 466 netif_disconnect_backend(info); 467 return (0); 468 } 469 470 static int 471 write_queue_xenstore_keys(device_t dev, 472 struct netfront_rxq *rxq, 473 struct netfront_txq *txq, 474 struct xs_transaction *xst, bool hierarchy) 475 { 476 int err; 477 const char *message; 478 const char *node = xenbus_get_node(dev); 479 char *path; 480 size_t path_size; 481 482 KASSERT(rxq->id == txq->id, ("Mismatch between RX and TX queue ids")); 483 /* Split event channel support is not yet there. */ 484 KASSERT(rxq->xen_intr_handle == txq->xen_intr_handle, 485 ("Split event channels are not supported")); 486 487 if (hierarchy) { 488 path_size = strlen(node) + 10; 489 path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); 490 snprintf(path, path_size, "%s/queue-%u", node, rxq->id); 491 } else { 492 path_size = strlen(node) + 1; 493 path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); 494 snprintf(path, path_size, "%s", node); 495 } 496 497 err = xs_printf(*xst, path, "tx-ring-ref","%u", txq->ring_ref); 498 if (err != 0) { 499 message = "writing tx ring-ref"; 500 goto error; 501 } 502 err = xs_printf(*xst, path, "rx-ring-ref","%u", rxq->ring_ref); 503 if (err != 0) { 504 message = "writing rx ring-ref"; 505 goto error; 506 } 507 err = xs_printf(*xst, path, "event-channel", "%u", 508 xen_intr_port(rxq->xen_intr_handle)); 509 if (err != 0) { 510 message = "writing event-channel"; 511 goto error; 512 } 513 514 free(path, M_DEVBUF); 515 516 return (0); 517 518 error: 519 free(path, M_DEVBUF); 520 xenbus_dev_fatal(dev, err, "%s", message); 521 522 return (err); 523 } 524 525 /* Common code used when first setting up, and when resuming. */ 526 static int 527 talk_to_backend(device_t dev, struct netfront_info *info) 528 { 529 const char *message; 530 struct xs_transaction xst; 531 const char *node = xenbus_get_node(dev); 532 int err; 533 unsigned long num_queues, max_queues = 0; 534 unsigned int i; 535 536 err = xen_net_read_mac(dev, info->mac); 537 if (err != 0) { 538 xenbus_dev_fatal(dev, err, "parsing %s/mac", node); 539 goto out; 540 } 541 542 err = xs_scanf(XST_NIL, xenbus_get_otherend_path(info->xbdev), 543 "multi-queue-max-queues", NULL, "%lu", &max_queues); 544 if (err != 0) 545 max_queues = 1; 546 num_queues = xn_num_queues; 547 if (num_queues > max_queues) 548 num_queues = max_queues; 549 550 err = setup_device(dev, info, num_queues); 551 if (err != 0) 552 goto out; 553 554 again: 555 err = xs_transaction_start(&xst); 556 if (err != 0) { 557 xenbus_dev_fatal(dev, err, "starting transaction"); 558 goto free; 559 } 560 561 if (info->num_queues == 1) { 562 err = write_queue_xenstore_keys(dev, &info->rxq[0], 563 &info->txq[0], &xst, false); 564 if (err != 0) 565 goto abort_transaction_no_def_error; 566 } else { 567 err = xs_printf(xst, node, "multi-queue-num-queues", 568 "%u", info->num_queues); 569 if (err != 0) { 570 message = "writing multi-queue-num-queues"; 571 goto abort_transaction; 572 } 573 574 for (i = 0; i < info->num_queues; i++) { 575 err = write_queue_xenstore_keys(dev, &info->rxq[i], 576 &info->txq[i], &xst, true); 577 if (err != 0) 578 goto abort_transaction_no_def_error; 579 } 580 } 581 582 err = xs_printf(xst, node, "request-rx-copy", "%u", 1); 583 if (err != 0) { 584 message = "writing request-rx-copy"; 585 goto abort_transaction; 586 } 587 err = xs_printf(xst, node, "feature-rx-notify", "%d", 1); 588 if (err != 0) { 589 message = "writing feature-rx-notify"; 590 goto abort_transaction; 591 } 592 err = xs_printf(xst, node, "feature-sg", "%d", 1); 593 if (err != 0) { 594 message = "writing feature-sg"; 595 goto abort_transaction; 596 } 597 err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1); 598 if (err != 0) { 599 message = "writing feature-gso-tcpv4"; 600 goto abort_transaction; 601 } 602 603 err = xs_transaction_end(xst, 0); 604 if (err != 0) { 605 if (err == EAGAIN) 606 goto again; 607 xenbus_dev_fatal(dev, err, "completing transaction"); 608 goto free; 609 } 610 611 return 0; 612 613 abort_transaction: 614 xenbus_dev_fatal(dev, err, "%s", message); 615 abort_transaction_no_def_error: 616 xs_transaction_end(xst, 1); 617 free: 618 netif_free(info); 619 out: 620 return (err); 621 } 622 623 static void 624 xn_rxq_tq_intr(void *xrxq, int pending) 625 { 626 struct netfront_rxq *rxq = xrxq; 627 628 XN_RX_LOCK(rxq); 629 xn_rxeof(rxq); 630 XN_RX_UNLOCK(rxq); 631 } 632 633 static void 634 xn_txq_start(struct netfront_txq *txq) 635 { 636 struct netfront_info *np = txq->info; 637 struct ifnet *ifp = np->xn_ifp; 638 639 XN_TX_LOCK_ASSERT(txq); 640 if (!drbr_empty(ifp, txq->br)) 641 xn_txq_mq_start_locked(txq, NULL); 642 } 643 644 static void 645 xn_txq_tq_intr(void *xtxq, int pending) 646 { 647 struct netfront_txq *txq = xtxq; 648 649 XN_TX_LOCK(txq); 650 if (RING_HAS_UNCONSUMED_RESPONSES(&txq->ring)) 651 xn_txeof(txq); 652 xn_txq_start(txq); 653 XN_TX_UNLOCK(txq); 654 } 655 656 static void 657 xn_txq_tq_deferred(void *xtxq, int pending) 658 { 659 struct netfront_txq *txq = xtxq; 660 661 XN_TX_LOCK(txq); 662 xn_txq_start(txq); 663 XN_TX_UNLOCK(txq); 664 } 665 666 static void 667 disconnect_rxq(struct netfront_rxq *rxq) 668 { 669 670 xn_release_rx_bufs(rxq); 671 gnttab_free_grant_references(rxq->gref_head); 672 gnttab_end_foreign_access_ref(rxq->ring_ref); 673 /* 674 * No split event channel support at the moment, handle will 675 * be unbound in tx. So no need to call xen_intr_unbind here, 676 * but we do want to reset the handler to 0. 677 */ 678 rxq->xen_intr_handle = 0; 679 } 680 681 static void 682 destroy_rxq(struct netfront_rxq *rxq) 683 { 684 685 callout_drain(&rxq->rx_refill); 686 free(rxq->ring.sring, M_DEVBUF); 687 taskqueue_drain_all(rxq->tq); 688 taskqueue_free(rxq->tq); 689 } 690 691 static void 692 destroy_rxqs(struct netfront_info *np) 693 { 694 int i; 695 696 for (i = 0; i < np->num_queues; i++) 697 destroy_rxq(&np->rxq[i]); 698 699 free(np->rxq, M_DEVBUF); 700 np->rxq = NULL; 701 } 702 703 static int 704 setup_rxqs(device_t dev, struct netfront_info *info, 705 unsigned long num_queues) 706 { 707 int q, i; 708 int error; 709 netif_rx_sring_t *rxs; 710 struct netfront_rxq *rxq; 711 712 info->rxq = malloc(sizeof(struct netfront_rxq) * num_queues, 713 M_DEVBUF, M_WAITOK|M_ZERO); 714 715 for (q = 0; q < num_queues; q++) { 716 rxq = &info->rxq[q]; 717 718 rxq->id = q; 719 rxq->info = info; 720 rxq->ring_ref = GRANT_REF_INVALID; 721 rxq->ring.sring = NULL; 722 snprintf(rxq->name, XN_QUEUE_NAME_LEN, "xnrx_%u", q); 723 mtx_init(&rxq->lock, rxq->name, "netfront receive lock", 724 MTX_DEF); 725 726 for (i = 0; i <= NET_RX_RING_SIZE; i++) { 727 rxq->mbufs[i] = NULL; 728 rxq->grant_ref[i] = GRANT_REF_INVALID; 729 } 730 731 /* Start resources allocation */ 732 733 if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, 734 &rxq->gref_head) != 0) { 735 device_printf(dev, "allocating rx gref"); 736 error = ENOMEM; 737 goto fail; 738 } 739 740 rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, 741 M_WAITOK|M_ZERO); 742 SHARED_RING_INIT(rxs); 743 FRONT_RING_INIT(&rxq->ring, rxs, PAGE_SIZE); 744 745 error = xenbus_grant_ring(dev, virt_to_mfn(rxs), 746 &rxq->ring_ref); 747 if (error != 0) { 748 device_printf(dev, "granting rx ring page"); 749 goto fail_grant_ring; 750 } 751 752 TASK_INIT(&rxq->intrtask, 0, xn_rxq_tq_intr, rxq); 753 rxq->tq = taskqueue_create_fast(rxq->name, M_WAITOK, 754 taskqueue_thread_enqueue, &rxq->tq); 755 756 callout_init(&rxq->rx_refill, 1); 757 758 error = taskqueue_start_threads(&rxq->tq, 1, PI_NET, 759 "%s rxq %d", device_get_nameunit(dev), rxq->id); 760 if (error != 0) { 761 device_printf(dev, "failed to start rx taskq %d\n", 762 rxq->id); 763 goto fail_start_thread; 764 } 765 } 766 767 return (0); 768 769 fail_start_thread: 770 gnttab_end_foreign_access_ref(rxq->ring_ref); 771 taskqueue_drain_all(rxq->tq); 772 taskqueue_free(rxq->tq); 773 fail_grant_ring: 774 gnttab_free_grant_references(rxq->gref_head); 775 free(rxq->ring.sring, M_DEVBUF); 776 fail: 777 for (; q >= 0; q--) { 778 disconnect_rxq(&info->rxq[q]); 779 destroy_rxq(&info->rxq[q]); 780 } 781 782 free(info->rxq, M_DEVBUF); 783 return (error); 784 } 785 786 static void 787 disconnect_txq(struct netfront_txq *txq) 788 { 789 790 xn_release_tx_bufs(txq); 791 gnttab_free_grant_references(txq->gref_head); 792 gnttab_end_foreign_access_ref(txq->ring_ref); 793 xen_intr_unbind(&txq->xen_intr_handle); 794 } 795 796 static void 797 destroy_txq(struct netfront_txq *txq) 798 { 799 800 free(txq->ring.sring, M_DEVBUF); 801 buf_ring_free(txq->br, M_DEVBUF); 802 taskqueue_drain_all(txq->tq); 803 taskqueue_free(txq->tq); 804 } 805 806 static void 807 destroy_txqs(struct netfront_info *np) 808 { 809 int i; 810 811 for (i = 0; i < np->num_queues; i++) 812 destroy_txq(&np->txq[i]); 813 814 free(np->txq, M_DEVBUF); 815 np->txq = NULL; 816 } 817 818 static int 819 setup_txqs(device_t dev, struct netfront_info *info, 820 unsigned long num_queues) 821 { 822 int q, i; 823 int error; 824 netif_tx_sring_t *txs; 825 struct netfront_txq *txq; 826 827 info->txq = malloc(sizeof(struct netfront_txq) * num_queues, 828 M_DEVBUF, M_WAITOK|M_ZERO); 829 830 for (q = 0; q < num_queues; q++) { 831 txq = &info->txq[q]; 832 833 txq->id = q; 834 txq->info = info; 835 836 txq->ring_ref = GRANT_REF_INVALID; 837 txq->ring.sring = NULL; 838 839 snprintf(txq->name, XN_QUEUE_NAME_LEN, "xntx_%u", q); 840 841 mtx_init(&txq->lock, txq->name, "netfront transmit lock", 842 MTX_DEF); 843 844 for (i = 0; i <= NET_TX_RING_SIZE; i++) { 845 txq->mbufs[i] = (void *) ((u_long) i+1); 846 txq->grant_ref[i] = GRANT_REF_INVALID; 847 } 848 txq->mbufs[NET_TX_RING_SIZE] = (void *)0; 849 850 /* Start resources allocation. */ 851 852 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, 853 &txq->gref_head) != 0) { 854 device_printf(dev, "failed to allocate tx grant refs\n"); 855 error = ENOMEM; 856 goto fail; 857 } 858 859 txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, 860 M_WAITOK|M_ZERO); 861 SHARED_RING_INIT(txs); 862 FRONT_RING_INIT(&txq->ring, txs, PAGE_SIZE); 863 864 error = xenbus_grant_ring(dev, virt_to_mfn(txs), 865 &txq->ring_ref); 866 if (error != 0) { 867 device_printf(dev, "failed to grant tx ring\n"); 868 goto fail_grant_ring; 869 } 870 871 txq->br = buf_ring_alloc(NET_TX_RING_SIZE, M_DEVBUF, 872 M_WAITOK, &txq->lock); 873 TASK_INIT(&txq->defrtask, 0, xn_txq_tq_deferred, txq); 874 TASK_INIT(&txq->intrtask, 0, xn_txq_tq_intr, txq); 875 876 txq->tq = taskqueue_create_fast(txq->name, M_WAITOK, 877 taskqueue_thread_enqueue, &txq->tq); 878 879 error = taskqueue_start_threads(&txq->tq, 1, PI_NET, 880 "%s txq %d", device_get_nameunit(dev), txq->id); 881 if (error != 0) { 882 device_printf(dev, "failed to start tx taskq %d\n", 883 txq->id); 884 goto fail_start_thread; 885 } 886 887 error = xen_intr_alloc_and_bind_local_port(dev, 888 xenbus_get_otherend_id(dev), xn_intr, /* handler */ NULL, 889 &info->txq[q], 890 INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY, 891 &txq->xen_intr_handle); 892 893 if (error != 0) { 894 device_printf(dev, "xen_intr_alloc_and_bind_local_port failed\n"); 895 goto fail_bind_port; 896 } 897 } 898 899 return (0); 900 901 fail_bind_port: 902 taskqueue_drain_all(txq->tq); 903 fail_start_thread: 904 buf_ring_free(txq->br, M_DEVBUF); 905 taskqueue_free(txq->tq); 906 gnttab_end_foreign_access_ref(txq->ring_ref); 907 fail_grant_ring: 908 gnttab_free_grant_references(txq->gref_head); 909 free(txq->ring.sring, M_DEVBUF); 910 fail: 911 for (; q >= 0; q--) { 912 disconnect_txq(&info->txq[q]); 913 destroy_txq(&info->txq[q]); 914 } 915 916 free(info->txq, M_DEVBUF); 917 return (error); 918 } 919 920 static int 921 setup_device(device_t dev, struct netfront_info *info, 922 unsigned long num_queues) 923 { 924 int error; 925 int q; 926 927 if (info->txq) 928 destroy_txqs(info); 929 930 if (info->rxq) 931 destroy_rxqs(info); 932 933 info->num_queues = 0; 934 935 error = setup_rxqs(dev, info, num_queues); 936 if (error != 0) 937 goto out; 938 error = setup_txqs(dev, info, num_queues); 939 if (error != 0) 940 goto out; 941 942 info->num_queues = num_queues; 943 944 /* No split event channel at the moment. */ 945 for (q = 0; q < num_queues; q++) 946 info->rxq[q].xen_intr_handle = info->txq[q].xen_intr_handle; 947 948 return (0); 949 950 out: 951 KASSERT(error != 0, ("Error path taken without providing an error code")); 952 return (error); 953 } 954 955 #ifdef INET 956 /** 957 * If this interface has an ipv4 address, send an arp for it. This 958 * helps to get the network going again after migrating hosts. 959 */ 960 static void 961 netfront_send_fake_arp(device_t dev, struct netfront_info *info) 962 { 963 struct ifnet *ifp; 964 struct ifaddr *ifa; 965 966 ifp = info->xn_ifp; 967 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 968 if (ifa->ifa_addr->sa_family == AF_INET) { 969 arp_ifinit(ifp, ifa); 970 } 971 } 972 } 973 #endif 974 975 /** 976 * Callback received when the backend's state changes. 977 */ 978 static void 979 netfront_backend_changed(device_t dev, XenbusState newstate) 980 { 981 struct netfront_info *sc = device_get_softc(dev); 982 983 DPRINTK("newstate=%d\n", newstate); 984 985 switch (newstate) { 986 case XenbusStateInitialising: 987 case XenbusStateInitialised: 988 case XenbusStateUnknown: 989 case XenbusStateClosed: 990 case XenbusStateReconfigured: 991 case XenbusStateReconfiguring: 992 break; 993 case XenbusStateInitWait: 994 if (xenbus_get_state(dev) != XenbusStateInitialising) 995 break; 996 if (xn_connect(sc) != 0) 997 break; 998 xenbus_set_state(dev, XenbusStateConnected); 999 break; 1000 case XenbusStateClosing: 1001 xenbus_set_state(dev, XenbusStateClosed); 1002 break; 1003 case XenbusStateConnected: 1004 #ifdef INET 1005 netfront_send_fake_arp(dev, sc); 1006 #endif 1007 break; 1008 } 1009 } 1010 1011 /** 1012 * \brief Verify that there is sufficient space in the Tx ring 1013 * buffer for a maximally sized request to be enqueued. 1014 * 1015 * A transmit request requires a transmit descriptor for each packet 1016 * fragment, plus up to 2 entries for "options" (e.g. TSO). 1017 */ 1018 static inline int 1019 xn_tx_slot_available(struct netfront_txq *txq) 1020 { 1021 1022 return (RING_FREE_REQUESTS(&txq->ring) > (MAX_TX_REQ_FRAGS + 2)); 1023 } 1024 1025 static void 1026 xn_release_tx_bufs(struct netfront_txq *txq) 1027 { 1028 int i; 1029 1030 for (i = 1; i <= NET_TX_RING_SIZE; i++) { 1031 struct mbuf *m; 1032 1033 m = txq->mbufs[i]; 1034 1035 /* 1036 * We assume that no kernel addresses are 1037 * less than NET_TX_RING_SIZE. Any entry 1038 * in the table that is below this number 1039 * must be an index from free-list tracking. 1040 */ 1041 if (((uintptr_t)m) <= NET_TX_RING_SIZE) 1042 continue; 1043 gnttab_end_foreign_access_ref(txq->grant_ref[i]); 1044 gnttab_release_grant_reference(&txq->gref_head, 1045 txq->grant_ref[i]); 1046 txq->grant_ref[i] = GRANT_REF_INVALID; 1047 add_id_to_freelist(txq->mbufs, i); 1048 txq->mbufs_cnt--; 1049 if (txq->mbufs_cnt < 0) { 1050 panic("%s: tx_chain_cnt must be >= 0", __func__); 1051 } 1052 m_free(m); 1053 } 1054 } 1055 1056 static struct mbuf * 1057 xn_alloc_one_rx_buffer(struct netfront_rxq *rxq) 1058 { 1059 struct mbuf *m; 1060 1061 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); 1062 if (m == NULL) 1063 return NULL; 1064 m->m_len = m->m_pkthdr.len = MJUMPAGESIZE; 1065 1066 return (m); 1067 } 1068 1069 static void 1070 xn_alloc_rx_buffers(struct netfront_rxq *rxq) 1071 { 1072 RING_IDX req_prod; 1073 int notify; 1074 1075 XN_RX_LOCK_ASSERT(rxq); 1076 1077 if (__predict_false(rxq->info->carrier == 0)) 1078 return; 1079 1080 for (req_prod = rxq->ring.req_prod_pvt; 1081 req_prod - rxq->ring.rsp_cons < NET_RX_RING_SIZE; 1082 req_prod++) { 1083 struct mbuf *m; 1084 unsigned short id; 1085 grant_ref_t ref; 1086 struct netif_rx_request *req; 1087 unsigned long pfn; 1088 1089 m = xn_alloc_one_rx_buffer(rxq); 1090 if (m == NULL) 1091 break; 1092 1093 id = xn_rxidx(req_prod); 1094 1095 KASSERT(rxq->mbufs[id] == NULL, ("non-NULL xn_rx_chain")); 1096 rxq->mbufs[id] = m; 1097 1098 ref = gnttab_claim_grant_reference(&rxq->gref_head); 1099 KASSERT(ref != GNTTAB_LIST_END, 1100 ("reserved grant references exhuasted")); 1101 rxq->grant_ref[id] = ref; 1102 1103 pfn = atop(vtophys(mtod(m, vm_offset_t))); 1104 req = RING_GET_REQUEST(&rxq->ring, req_prod); 1105 1106 gnttab_grant_foreign_access_ref(ref, 1107 xenbus_get_otherend_id(rxq->info->xbdev), pfn, 0); 1108 req->id = id; 1109 req->gref = ref; 1110 } 1111 1112 rxq->ring.req_prod_pvt = req_prod; 1113 1114 /* Not enough requests? Try again later. */ 1115 if (req_prod - rxq->ring.rsp_cons < NET_RX_SLOTS_MIN) { 1116 callout_reset(&rxq->rx_refill, hz/10, xn_alloc_rx_buffers_callout, 1117 rxq); 1118 return; 1119 } 1120 1121 wmb(); /* barrier so backend seens requests */ 1122 1123 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rxq->ring, notify); 1124 if (notify) 1125 xen_intr_signal(rxq->xen_intr_handle); 1126 } 1127 1128 static void xn_alloc_rx_buffers_callout(void *arg) 1129 { 1130 struct netfront_rxq *rxq; 1131 1132 rxq = (struct netfront_rxq *)arg; 1133 XN_RX_LOCK(rxq); 1134 xn_alloc_rx_buffers(rxq); 1135 XN_RX_UNLOCK(rxq); 1136 } 1137 1138 static void 1139 xn_release_rx_bufs(struct netfront_rxq *rxq) 1140 { 1141 int i, ref; 1142 struct mbuf *m; 1143 1144 for (i = 0; i < NET_RX_RING_SIZE; i++) { 1145 m = rxq->mbufs[i]; 1146 1147 if (m == NULL) 1148 continue; 1149 1150 ref = rxq->grant_ref[i]; 1151 if (ref == GRANT_REF_INVALID) 1152 continue; 1153 1154 gnttab_end_foreign_access_ref(ref); 1155 gnttab_release_grant_reference(&rxq->gref_head, ref); 1156 rxq->mbufs[i] = NULL; 1157 rxq->grant_ref[i] = GRANT_REF_INVALID; 1158 m_freem(m); 1159 } 1160 } 1161 1162 static void 1163 xn_rxeof(struct netfront_rxq *rxq) 1164 { 1165 struct ifnet *ifp; 1166 struct netfront_info *np = rxq->info; 1167 #if (defined(INET) || defined(INET6)) 1168 struct lro_ctrl *lro = &rxq->lro; 1169 #endif 1170 struct netfront_rx_info rinfo; 1171 struct netif_rx_response *rx = &rinfo.rx; 1172 struct netif_extra_info *extras = rinfo.extras; 1173 RING_IDX i, rp; 1174 struct mbuf *m; 1175 struct mbufq mbufq_rxq, mbufq_errq; 1176 int err, work_to_do; 1177 1178 do { 1179 XN_RX_LOCK_ASSERT(rxq); 1180 if (!netfront_carrier_ok(np)) 1181 return; 1182 1183 /* XXX: there should be some sane limit. */ 1184 mbufq_init(&mbufq_errq, INT_MAX); 1185 mbufq_init(&mbufq_rxq, INT_MAX); 1186 1187 ifp = np->xn_ifp; 1188 1189 rp = rxq->ring.sring->rsp_prod; 1190 rmb(); /* Ensure we see queued responses up to 'rp'. */ 1191 1192 i = rxq->ring.rsp_cons; 1193 while ((i != rp)) { 1194 memcpy(rx, RING_GET_RESPONSE(&rxq->ring, i), sizeof(*rx)); 1195 memset(extras, 0, sizeof(rinfo.extras)); 1196 1197 m = NULL; 1198 err = xn_get_responses(rxq, &rinfo, rp, &i, &m); 1199 1200 if (__predict_false(err)) { 1201 if (m) 1202 (void )mbufq_enqueue(&mbufq_errq, m); 1203 rxq->stats.rx_errors++; 1204 continue; 1205 } 1206 1207 m->m_pkthdr.rcvif = ifp; 1208 if ( rx->flags & NETRXF_data_validated ) { 1209 /* Tell the stack the checksums are okay */ 1210 /* 1211 * XXX this isn't necessarily the case - need to add 1212 * check 1213 */ 1214 1215 m->m_pkthdr.csum_flags |= 1216 (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID 1217 | CSUM_PSEUDO_HDR); 1218 m->m_pkthdr.csum_data = 0xffff; 1219 } 1220 if ((rx->flags & NETRXF_extra_info) != 0 && 1221 (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type == 1222 XEN_NETIF_EXTRA_TYPE_GSO)) { 1223 m->m_pkthdr.tso_segsz = 1224 extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].u.gso.size; 1225 m->m_pkthdr.csum_flags |= CSUM_TSO; 1226 } 1227 1228 rxq->stats.rx_packets++; 1229 rxq->stats.rx_bytes += m->m_pkthdr.len; 1230 1231 (void )mbufq_enqueue(&mbufq_rxq, m); 1232 rxq->ring.rsp_cons = i; 1233 } 1234 1235 mbufq_drain(&mbufq_errq); 1236 1237 /* 1238 * Process all the mbufs after the remapping is complete. 1239 * Break the mbuf chain first though. 1240 */ 1241 while ((m = mbufq_dequeue(&mbufq_rxq)) != NULL) { 1242 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 1243 1244 /* XXX: Do we really need to drop the rx lock? */ 1245 XN_RX_UNLOCK(rxq); 1246 #if (defined(INET) || defined(INET6)) 1247 /* Use LRO if possible */ 1248 if ((ifp->if_capenable & IFCAP_LRO) == 0 || 1249 lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) { 1250 /* 1251 * If LRO fails, pass up to the stack 1252 * directly. 1253 */ 1254 (*ifp->if_input)(ifp, m); 1255 } 1256 #else 1257 (*ifp->if_input)(ifp, m); 1258 #endif 1259 1260 XN_RX_LOCK(rxq); 1261 } 1262 1263 rxq->ring.rsp_cons = i; 1264 1265 #if (defined(INET) || defined(INET6)) 1266 /* 1267 * Flush any outstanding LRO work 1268 */ 1269 tcp_lro_flush_all(lro); 1270 #endif 1271 1272 xn_alloc_rx_buffers(rxq); 1273 1274 RING_FINAL_CHECK_FOR_RESPONSES(&rxq->ring, work_to_do); 1275 } while (work_to_do); 1276 } 1277 1278 static void 1279 xn_txeof(struct netfront_txq *txq) 1280 { 1281 RING_IDX i, prod; 1282 unsigned short id; 1283 struct ifnet *ifp; 1284 netif_tx_response_t *txr; 1285 struct mbuf *m; 1286 struct netfront_info *np = txq->info; 1287 1288 XN_TX_LOCK_ASSERT(txq); 1289 1290 if (!netfront_carrier_ok(np)) 1291 return; 1292 1293 ifp = np->xn_ifp; 1294 1295 do { 1296 prod = txq->ring.sring->rsp_prod; 1297 rmb(); /* Ensure we see responses up to 'rp'. */ 1298 1299 for (i = txq->ring.rsp_cons; i != prod; i++) { 1300 txr = RING_GET_RESPONSE(&txq->ring, i); 1301 if (txr->status == NETIF_RSP_NULL) 1302 continue; 1303 1304 if (txr->status != NETIF_RSP_OKAY) { 1305 printf("%s: WARNING: response is %d!\n", 1306 __func__, txr->status); 1307 } 1308 id = txr->id; 1309 m = txq->mbufs[id]; 1310 KASSERT(m != NULL, ("mbuf not found in chain")); 1311 KASSERT((uintptr_t)m > NET_TX_RING_SIZE, 1312 ("mbuf already on the free list, but we're " 1313 "trying to free it again!")); 1314 M_ASSERTVALID(m); 1315 1316 /* 1317 * Increment packet count if this is the last 1318 * mbuf of the chain. 1319 */ 1320 if (!m->m_next) 1321 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 1322 if (__predict_false(gnttab_query_foreign_access( 1323 txq->grant_ref[id]) != 0)) { 1324 panic("%s: grant id %u still in use by the " 1325 "backend", __func__, id); 1326 } 1327 gnttab_end_foreign_access_ref(txq->grant_ref[id]); 1328 gnttab_release_grant_reference( 1329 &txq->gref_head, txq->grant_ref[id]); 1330 txq->grant_ref[id] = GRANT_REF_INVALID; 1331 1332 txq->mbufs[id] = NULL; 1333 add_id_to_freelist(txq->mbufs, id); 1334 txq->mbufs_cnt--; 1335 m_free(m); 1336 /* Only mark the txq active if we've freed up at least one slot to try */ 1337 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1338 } 1339 txq->ring.rsp_cons = prod; 1340 1341 /* 1342 * Set a new event, then check for race with update of 1343 * tx_cons. Note that it is essential to schedule a 1344 * callback, no matter how few buffers are pending. Even if 1345 * there is space in the transmit ring, higher layers may 1346 * be blocked because too much data is outstanding: in such 1347 * cases notification from Xen is likely to be the only kick 1348 * that we'll get. 1349 */ 1350 txq->ring.sring->rsp_event = 1351 prod + ((txq->ring.sring->req_prod - prod) >> 1) + 1; 1352 1353 mb(); 1354 } while (prod != txq->ring.sring->rsp_prod); 1355 1356 if (txq->full && 1357 ((txq->ring.sring->req_prod - prod) < NET_TX_RING_SIZE)) { 1358 txq->full = false; 1359 taskqueue_enqueue(txq->tq, &txq->intrtask); 1360 } 1361 } 1362 1363 1364 static void 1365 xn_rxq_intr(void *xrxq) 1366 { 1367 struct netfront_rxq *rxq = xrxq; 1368 1369 taskqueue_enqueue(rxq->tq, &rxq->intrtask); 1370 } 1371 1372 static void 1373 xn_txq_intr(void *xtxq) 1374 { 1375 struct netfront_txq *txq = xtxq; 1376 1377 taskqueue_enqueue(txq->tq, &txq->intrtask); 1378 } 1379 1380 static int 1381 xn_intr(void *xsc) 1382 { 1383 struct netfront_txq *txq = xsc; 1384 struct netfront_info *np = txq->info; 1385 struct netfront_rxq *rxq = &np->rxq[txq->id]; 1386 1387 /* kick both tx and rx */ 1388 xn_rxq_intr(rxq); 1389 xn_txq_intr(txq); 1390 1391 return (FILTER_HANDLED); 1392 } 1393 1394 static void 1395 xn_move_rx_slot(struct netfront_rxq *rxq, struct mbuf *m, 1396 grant_ref_t ref) 1397 { 1398 int new = xn_rxidx(rxq->ring.req_prod_pvt); 1399 1400 KASSERT(rxq->mbufs[new] == NULL, ("mbufs != NULL")); 1401 rxq->mbufs[new] = m; 1402 rxq->grant_ref[new] = ref; 1403 RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->id = new; 1404 RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->gref = ref; 1405 rxq->ring.req_prod_pvt++; 1406 } 1407 1408 static int 1409 xn_get_extras(struct netfront_rxq *rxq, 1410 struct netif_extra_info *extras, RING_IDX rp, RING_IDX *cons) 1411 { 1412 struct netif_extra_info *extra; 1413 1414 int err = 0; 1415 1416 do { 1417 struct mbuf *m; 1418 grant_ref_t ref; 1419 1420 if (__predict_false(*cons + 1 == rp)) { 1421 err = EINVAL; 1422 break; 1423 } 1424 1425 extra = (struct netif_extra_info *) 1426 RING_GET_RESPONSE(&rxq->ring, ++(*cons)); 1427 1428 if (__predict_false(!extra->type || 1429 extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 1430 err = EINVAL; 1431 } else { 1432 memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); 1433 } 1434 1435 m = xn_get_rx_mbuf(rxq, *cons); 1436 ref = xn_get_rx_ref(rxq, *cons); 1437 xn_move_rx_slot(rxq, m, ref); 1438 } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); 1439 1440 return err; 1441 } 1442 1443 static int 1444 xn_get_responses(struct netfront_rxq *rxq, 1445 struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, 1446 struct mbuf **list) 1447 { 1448 struct netif_rx_response *rx = &rinfo->rx; 1449 struct netif_extra_info *extras = rinfo->extras; 1450 struct mbuf *m, *m0, *m_prev; 1451 grant_ref_t ref = xn_get_rx_ref(rxq, *cons); 1452 RING_IDX ref_cons = *cons; 1453 int frags = 1; 1454 int err = 0; 1455 u_long ret; 1456 1457 m0 = m = m_prev = xn_get_rx_mbuf(rxq, *cons); 1458 1459 if (rx->flags & NETRXF_extra_info) { 1460 err = xn_get_extras(rxq, extras, rp, cons); 1461 } 1462 1463 if (m0 != NULL) { 1464 m0->m_pkthdr.len = 0; 1465 m0->m_next = NULL; 1466 } 1467 1468 for (;;) { 1469 #if 0 1470 DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n", 1471 rx->status, rx->offset, frags); 1472 #endif 1473 if (__predict_false(rx->status < 0 || 1474 rx->offset + rx->status > PAGE_SIZE)) { 1475 1476 xn_move_rx_slot(rxq, m, ref); 1477 if (m0 == m) 1478 m0 = NULL; 1479 m = NULL; 1480 err = EINVAL; 1481 goto next_skip_queue; 1482 } 1483 1484 /* 1485 * This definitely indicates a bug, either in this driver or in 1486 * the backend driver. In future this should flag the bad 1487 * situation to the system controller to reboot the backed. 1488 */ 1489 if (ref == GRANT_REF_INVALID) { 1490 printf("%s: Bad rx response id %d.\n", __func__, rx->id); 1491 err = EINVAL; 1492 goto next; 1493 } 1494 1495 ret = gnttab_end_foreign_access_ref(ref); 1496 KASSERT(ret, ("Unable to end access to grant references")); 1497 1498 gnttab_release_grant_reference(&rxq->gref_head, ref); 1499 1500 next: 1501 if (m == NULL) 1502 break; 1503 1504 m->m_len = rx->status; 1505 m->m_data += rx->offset; 1506 m0->m_pkthdr.len += rx->status; 1507 1508 next_skip_queue: 1509 if (!(rx->flags & NETRXF_more_data)) 1510 break; 1511 1512 if (*cons + frags == rp) { 1513 if (net_ratelimit()) 1514 WPRINTK("Need more frags\n"); 1515 err = ENOENT; 1516 printf("%s: cons %u frags %u rp %u, not enough frags\n", 1517 __func__, *cons, frags, rp); 1518 break; 1519 } 1520 /* 1521 * Note that m can be NULL, if rx->status < 0 or if 1522 * rx->offset + rx->status > PAGE_SIZE above. 1523 */ 1524 m_prev = m; 1525 1526 rx = RING_GET_RESPONSE(&rxq->ring, *cons + frags); 1527 m = xn_get_rx_mbuf(rxq, *cons + frags); 1528 1529 /* 1530 * m_prev == NULL can happen if rx->status < 0 or if 1531 * rx->offset + * rx->status > PAGE_SIZE above. 1532 */ 1533 if (m_prev != NULL) 1534 m_prev->m_next = m; 1535 1536 /* 1537 * m0 can be NULL if rx->status < 0 or if * rx->offset + 1538 * rx->status > PAGE_SIZE above. 1539 */ 1540 if (m0 == NULL) 1541 m0 = m; 1542 m->m_next = NULL; 1543 ref = xn_get_rx_ref(rxq, *cons + frags); 1544 ref_cons = *cons + frags; 1545 frags++; 1546 } 1547 *list = m0; 1548 *cons += frags; 1549 1550 return (err); 1551 } 1552 1553 /** 1554 * \brief Count the number of fragments in an mbuf chain. 1555 * 1556 * Surprisingly, there isn't an M* macro for this. 1557 */ 1558 static inline int 1559 xn_count_frags(struct mbuf *m) 1560 { 1561 int nfrags; 1562 1563 for (nfrags = 0; m != NULL; m = m->m_next) 1564 nfrags++; 1565 1566 return (nfrags); 1567 } 1568 1569 /** 1570 * Given an mbuf chain, make sure we have enough room and then push 1571 * it onto the transmit ring. 1572 */ 1573 static int 1574 xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) 1575 { 1576 struct mbuf *m; 1577 struct netfront_info *np = txq->info; 1578 struct ifnet *ifp = np->xn_ifp; 1579 u_int nfrags; 1580 int otherend_id; 1581 1582 /** 1583 * Defragment the mbuf if necessary. 1584 */ 1585 nfrags = xn_count_frags(m_head); 1586 1587 /* 1588 * Check to see whether this request is longer than netback 1589 * can handle, and try to defrag it. 1590 */ 1591 /** 1592 * It is a bit lame, but the netback driver in Linux can't 1593 * deal with nfrags > MAX_TX_REQ_FRAGS, which is a quirk of 1594 * the Linux network stack. 1595 */ 1596 if (nfrags > np->maxfrags) { 1597 m = m_defrag(m_head, M_NOWAIT); 1598 if (!m) { 1599 /* 1600 * Defrag failed, so free the mbuf and 1601 * therefore drop the packet. 1602 */ 1603 m_freem(m_head); 1604 return (EMSGSIZE); 1605 } 1606 m_head = m; 1607 } 1608 1609 /* Determine how many fragments now exist */ 1610 nfrags = xn_count_frags(m_head); 1611 1612 /* 1613 * Check to see whether the defragmented packet has too many 1614 * segments for the Linux netback driver. 1615 */ 1616 /** 1617 * The FreeBSD TCP stack, with TSO enabled, can produce a chain 1618 * of mbufs longer than Linux can handle. Make sure we don't 1619 * pass a too-long chain over to the other side by dropping the 1620 * packet. It doesn't look like there is currently a way to 1621 * tell the TCP stack to generate a shorter chain of packets. 1622 */ 1623 if (nfrags > MAX_TX_REQ_FRAGS) { 1624 #ifdef DEBUG 1625 printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback " 1626 "won't be able to handle it, dropping\n", 1627 __func__, nfrags, MAX_TX_REQ_FRAGS); 1628 #endif 1629 m_freem(m_head); 1630 return (EMSGSIZE); 1631 } 1632 1633 /* 1634 * This check should be redundant. We've already verified that we 1635 * have enough slots in the ring to handle a packet of maximum 1636 * size, and that our packet is less than the maximum size. Keep 1637 * it in here as an assert for now just to make certain that 1638 * chain_cnt is accurate. 1639 */ 1640 KASSERT((txq->mbufs_cnt + nfrags) <= NET_TX_RING_SIZE, 1641 ("%s: chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE " 1642 "(%d)!", __func__, (int) txq->mbufs_cnt, 1643 (int) nfrags, (int) NET_TX_RING_SIZE)); 1644 1645 /* 1646 * Start packing the mbufs in this chain into 1647 * the fragment pointers. Stop when we run out 1648 * of fragments or hit the end of the mbuf chain. 1649 */ 1650 m = m_head; 1651 otherend_id = xenbus_get_otherend_id(np->xbdev); 1652 for (m = m_head; m; m = m->m_next) { 1653 netif_tx_request_t *tx; 1654 uintptr_t id; 1655 grant_ref_t ref; 1656 u_long mfn; /* XXX Wrong type? */ 1657 1658 tx = RING_GET_REQUEST(&txq->ring, txq->ring.req_prod_pvt); 1659 id = get_id_from_freelist(txq->mbufs); 1660 if (id == 0) 1661 panic("%s: was allocated the freelist head!\n", 1662 __func__); 1663 txq->mbufs_cnt++; 1664 if (txq->mbufs_cnt > NET_TX_RING_SIZE) 1665 panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n", 1666 __func__); 1667 txq->mbufs[id] = m; 1668 tx->id = id; 1669 ref = gnttab_claim_grant_reference(&txq->gref_head); 1670 KASSERT((short)ref >= 0, ("Negative ref")); 1671 mfn = virt_to_mfn(mtod(m, vm_offset_t)); 1672 gnttab_grant_foreign_access_ref(ref, otherend_id, 1673 mfn, GNTMAP_readonly); 1674 tx->gref = txq->grant_ref[id] = ref; 1675 tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1); 1676 tx->flags = 0; 1677 if (m == m_head) { 1678 /* 1679 * The first fragment has the entire packet 1680 * size, subsequent fragments have just the 1681 * fragment size. The backend works out the 1682 * true size of the first fragment by 1683 * subtracting the sizes of the other 1684 * fragments. 1685 */ 1686 tx->size = m->m_pkthdr.len; 1687 1688 /* 1689 * The first fragment contains the checksum flags 1690 * and is optionally followed by extra data for 1691 * TSO etc. 1692 */ 1693 /** 1694 * CSUM_TSO requires checksum offloading. 1695 * Some versions of FreeBSD fail to 1696 * set CSUM_TCP in the CSUM_TSO case, 1697 * so we have to test for CSUM_TSO 1698 * explicitly. 1699 */ 1700 if (m->m_pkthdr.csum_flags 1701 & (CSUM_DELAY_DATA | CSUM_TSO)) { 1702 tx->flags |= (NETTXF_csum_blank 1703 | NETTXF_data_validated); 1704 } 1705 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 1706 struct netif_extra_info *gso = 1707 (struct netif_extra_info *) 1708 RING_GET_REQUEST(&txq->ring, 1709 ++txq->ring.req_prod_pvt); 1710 1711 tx->flags |= NETTXF_extra_info; 1712 1713 gso->u.gso.size = m->m_pkthdr.tso_segsz; 1714 gso->u.gso.type = 1715 XEN_NETIF_GSO_TYPE_TCPV4; 1716 gso->u.gso.pad = 0; 1717 gso->u.gso.features = 0; 1718 1719 gso->type = XEN_NETIF_EXTRA_TYPE_GSO; 1720 gso->flags = 0; 1721 } 1722 } else { 1723 tx->size = m->m_len; 1724 } 1725 if (m->m_next) 1726 tx->flags |= NETTXF_more_data; 1727 1728 txq->ring.req_prod_pvt++; 1729 } 1730 BPF_MTAP(ifp, m_head); 1731 1732 xn_txeof(txq); 1733 1734 txq->stats.tx_bytes += m_head->m_pkthdr.len; 1735 txq->stats.tx_packets++; 1736 1737 return (0); 1738 } 1739 1740 /* equivalent of network_open() in Linux */ 1741 static void 1742 xn_ifinit_locked(struct netfront_info *np) 1743 { 1744 struct ifnet *ifp; 1745 int i; 1746 struct netfront_rxq *rxq; 1747 1748 XN_LOCK_ASSERT(np); 1749 1750 ifp = np->xn_ifp; 1751 1752 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1753 return; 1754 1755 xn_stop(np); 1756 1757 for (i = 0; i < np->num_queues; i++) { 1758 rxq = &np->rxq[i]; 1759 XN_RX_LOCK(rxq); 1760 xn_alloc_rx_buffers(rxq); 1761 rxq->ring.sring->rsp_event = rxq->ring.rsp_cons + 1; 1762 if (RING_HAS_UNCONSUMED_RESPONSES(&rxq->ring)) 1763 taskqueue_enqueue(rxq->tq, &rxq->intrtask); 1764 XN_RX_UNLOCK(rxq); 1765 } 1766 1767 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1768 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1769 if_link_state_change(ifp, LINK_STATE_UP); 1770 } 1771 1772 static void 1773 xn_ifinit(void *xsc) 1774 { 1775 struct netfront_info *sc = xsc; 1776 1777 XN_LOCK(sc); 1778 xn_ifinit_locked(sc); 1779 XN_UNLOCK(sc); 1780 } 1781 1782 static int 1783 xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1784 { 1785 struct netfront_info *sc = ifp->if_softc; 1786 struct ifreq *ifr = (struct ifreq *) data; 1787 #ifdef INET 1788 struct ifaddr *ifa = (struct ifaddr *)data; 1789 #endif 1790 1791 int mask, error = 0; 1792 switch(cmd) { 1793 case SIOCSIFADDR: 1794 #ifdef INET 1795 XN_LOCK(sc); 1796 if (ifa->ifa_addr->sa_family == AF_INET) { 1797 ifp->if_flags |= IFF_UP; 1798 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 1799 xn_ifinit_locked(sc); 1800 arp_ifinit(ifp, ifa); 1801 XN_UNLOCK(sc); 1802 } else { 1803 XN_UNLOCK(sc); 1804 #endif 1805 error = ether_ioctl(ifp, cmd, data); 1806 #ifdef INET 1807 } 1808 #endif 1809 break; 1810 case SIOCSIFMTU: 1811 ifp->if_mtu = ifr->ifr_mtu; 1812 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1813 xn_ifinit(sc); 1814 break; 1815 case SIOCSIFFLAGS: 1816 XN_LOCK(sc); 1817 if (ifp->if_flags & IFF_UP) { 1818 /* 1819 * If only the state of the PROMISC flag changed, 1820 * then just use the 'set promisc mode' command 1821 * instead of reinitializing the entire NIC. Doing 1822 * a full re-init means reloading the firmware and 1823 * waiting for it to start up, which may take a 1824 * second or two. 1825 */ 1826 xn_ifinit_locked(sc); 1827 } else { 1828 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1829 xn_stop(sc); 1830 } 1831 } 1832 sc->xn_if_flags = ifp->if_flags; 1833 XN_UNLOCK(sc); 1834 break; 1835 case SIOCSIFCAP: 1836 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 1837 if (mask & IFCAP_TXCSUM) { 1838 if (IFCAP_TXCSUM & ifp->if_capenable) { 1839 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 1840 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 1841 | CSUM_IP | CSUM_TSO); 1842 } else { 1843 ifp->if_capenable |= IFCAP_TXCSUM; 1844 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP 1845 | CSUM_IP); 1846 } 1847 } 1848 if (mask & IFCAP_RXCSUM) { 1849 ifp->if_capenable ^= IFCAP_RXCSUM; 1850 } 1851 if (mask & IFCAP_TSO4) { 1852 if (IFCAP_TSO4 & ifp->if_capenable) { 1853 ifp->if_capenable &= ~IFCAP_TSO4; 1854 ifp->if_hwassist &= ~CSUM_TSO; 1855 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 1856 ifp->if_capenable |= IFCAP_TSO4; 1857 ifp->if_hwassist |= CSUM_TSO; 1858 } else { 1859 IPRINTK("Xen requires tx checksum offload" 1860 " be enabled to use TSO\n"); 1861 error = EINVAL; 1862 } 1863 } 1864 if (mask & IFCAP_LRO) { 1865 ifp->if_capenable ^= IFCAP_LRO; 1866 1867 } 1868 break; 1869 case SIOCADDMULTI: 1870 case SIOCDELMULTI: 1871 break; 1872 case SIOCSIFMEDIA: 1873 case SIOCGIFMEDIA: 1874 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 1875 break; 1876 default: 1877 error = ether_ioctl(ifp, cmd, data); 1878 } 1879 1880 return (error); 1881 } 1882 1883 static void 1884 xn_stop(struct netfront_info *sc) 1885 { 1886 struct ifnet *ifp; 1887 1888 XN_LOCK_ASSERT(sc); 1889 1890 ifp = sc->xn_ifp; 1891 1892 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 1893 if_link_state_change(ifp, LINK_STATE_DOWN); 1894 } 1895 1896 static void 1897 xn_rebuild_rx_bufs(struct netfront_rxq *rxq) 1898 { 1899 int requeue_idx, i; 1900 grant_ref_t ref; 1901 netif_rx_request_t *req; 1902 1903 for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { 1904 struct mbuf *m; 1905 u_long pfn; 1906 1907 if (rxq->mbufs[i] == NULL) 1908 continue; 1909 1910 m = rxq->mbufs[requeue_idx] = xn_get_rx_mbuf(rxq, i); 1911 ref = rxq->grant_ref[requeue_idx] = xn_get_rx_ref(rxq, i); 1912 1913 req = RING_GET_REQUEST(&rxq->ring, requeue_idx); 1914 pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT; 1915 1916 gnttab_grant_foreign_access_ref(ref, 1917 xenbus_get_otherend_id(rxq->info->xbdev), 1918 pfn, 0); 1919 1920 req->gref = ref; 1921 req->id = requeue_idx; 1922 1923 requeue_idx++; 1924 } 1925 1926 rxq->ring.req_prod_pvt = requeue_idx; 1927 } 1928 1929 /* START of Xenolinux helper functions adapted to FreeBSD */ 1930 int 1931 xn_connect(struct netfront_info *np) 1932 { 1933 int i, error; 1934 u_int feature_rx_copy; 1935 struct netfront_rxq *rxq; 1936 struct netfront_txq *txq; 1937 1938 error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 1939 "feature-rx-copy", NULL, "%u", &feature_rx_copy); 1940 if (error != 0) 1941 feature_rx_copy = 0; 1942 1943 /* We only support rx copy. */ 1944 if (!feature_rx_copy) 1945 return (EPROTONOSUPPORT); 1946 1947 /* Recovery procedure: */ 1948 error = talk_to_backend(np->xbdev, np); 1949 if (error != 0) 1950 return (error); 1951 1952 /* Step 1: Reinitialise variables. */ 1953 xn_query_features(np); 1954 xn_configure_features(np); 1955 1956 /* Step 2: Release TX buffer */ 1957 for (i = 0; i < np->num_queues; i++) { 1958 txq = &np->txq[i]; 1959 xn_release_tx_bufs(txq); 1960 } 1961 1962 /* Step 3: Rebuild the RX buffer freelist and the RX ring itself. */ 1963 for (i = 0; i < np->num_queues; i++) { 1964 rxq = &np->rxq[i]; 1965 xn_rebuild_rx_bufs(rxq); 1966 } 1967 1968 /* Step 4: All public and private state should now be sane. Get 1969 * ready to start sending and receiving packets and give the driver 1970 * domain a kick because we've probably just requeued some 1971 * packets. 1972 */ 1973 netfront_carrier_on(np); 1974 for (i = 0; i < np->num_queues; i++) { 1975 txq = &np->txq[i]; 1976 xen_intr_signal(txq->xen_intr_handle); 1977 XN_TX_LOCK(txq); 1978 xn_txeof(txq); 1979 XN_TX_UNLOCK(txq); 1980 XN_RX_LOCK(rxq); 1981 xn_alloc_rx_buffers(rxq); 1982 XN_RX_UNLOCK(rxq); 1983 } 1984 1985 return (0); 1986 } 1987 1988 static void 1989 xn_query_features(struct netfront_info *np) 1990 { 1991 int val; 1992 1993 device_printf(np->xbdev, "backend features:"); 1994 1995 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 1996 "feature-sg", NULL, "%d", &val) != 0) 1997 val = 0; 1998 1999 np->maxfrags = 1; 2000 if (val) { 2001 np->maxfrags = MAX_TX_REQ_FRAGS; 2002 printf(" feature-sg"); 2003 } 2004 2005 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2006 "feature-gso-tcpv4", NULL, "%d", &val) != 0) 2007 val = 0; 2008 2009 np->xn_ifp->if_capabilities &= ~(IFCAP_TSO4|IFCAP_LRO); 2010 if (val) { 2011 np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO; 2012 printf(" feature-gso-tcp4"); 2013 } 2014 2015 printf("\n"); 2016 } 2017 2018 static int 2019 xn_configure_features(struct netfront_info *np) 2020 { 2021 int err, cap_enabled; 2022 #if (defined(INET) || defined(INET6)) 2023 int i; 2024 #endif 2025 2026 err = 0; 2027 2028 if (np->xn_resume && 2029 ((np->xn_ifp->if_capenable & np->xn_ifp->if_capabilities) 2030 == np->xn_ifp->if_capenable)) { 2031 /* Current options are available, no need to do anything. */ 2032 return (0); 2033 } 2034 2035 /* Try to preserve as many options as possible. */ 2036 if (np->xn_resume) 2037 cap_enabled = np->xn_ifp->if_capenable; 2038 else 2039 cap_enabled = UINT_MAX; 2040 2041 #if (defined(INET) || defined(INET6)) 2042 for (i = 0; i < np->num_queues; i++) 2043 if ((np->xn_ifp->if_capenable & IFCAP_LRO) == 2044 (cap_enabled & IFCAP_LRO)) 2045 tcp_lro_free(&np->rxq[i].lro); 2046 #endif 2047 np->xn_ifp->if_capenable = 2048 np->xn_ifp->if_capabilities & ~(IFCAP_LRO|IFCAP_TSO4) & cap_enabled; 2049 np->xn_ifp->if_hwassist &= ~CSUM_TSO; 2050 #if (defined(INET) || defined(INET6)) 2051 for (i = 0; i < np->num_queues; i++) { 2052 if (xn_enable_lro && (np->xn_ifp->if_capabilities & IFCAP_LRO) == 2053 (cap_enabled & IFCAP_LRO)) { 2054 err = tcp_lro_init(&np->rxq[i].lro); 2055 if (err != 0) { 2056 device_printf(np->xbdev, "LRO initialization failed\n"); 2057 } else { 2058 np->rxq[i].lro.ifp = np->xn_ifp; 2059 np->xn_ifp->if_capenable |= IFCAP_LRO; 2060 } 2061 } 2062 } 2063 if ((np->xn_ifp->if_capabilities & IFCAP_TSO4) == 2064 (cap_enabled & IFCAP_TSO4)) { 2065 np->xn_ifp->if_capenable |= IFCAP_TSO4; 2066 np->xn_ifp->if_hwassist |= CSUM_TSO; 2067 } 2068 #endif 2069 return (err); 2070 } 2071 2072 static int 2073 xn_txq_mq_start_locked(struct netfront_txq *txq, struct mbuf *m) 2074 { 2075 struct netfront_info *np; 2076 struct ifnet *ifp; 2077 struct buf_ring *br; 2078 int error, notify; 2079 2080 np = txq->info; 2081 br = txq->br; 2082 ifp = np->xn_ifp; 2083 error = 0; 2084 2085 XN_TX_LOCK_ASSERT(txq); 2086 2087 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2088 !netfront_carrier_ok(np)) { 2089 if (m != NULL) 2090 error = drbr_enqueue(ifp, br, m); 2091 return (error); 2092 } 2093 2094 if (m != NULL) { 2095 error = drbr_enqueue(ifp, br, m); 2096 if (error != 0) 2097 return (error); 2098 } 2099 2100 while ((m = drbr_peek(ifp, br)) != NULL) { 2101 if (!xn_tx_slot_available(txq)) { 2102 drbr_putback(ifp, br, m); 2103 break; 2104 } 2105 2106 error = xn_assemble_tx_request(txq, m); 2107 /* xn_assemble_tx_request always consumes the mbuf*/ 2108 if (error != 0) { 2109 drbr_advance(ifp, br); 2110 break; 2111 } 2112 2113 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&txq->ring, notify); 2114 if (notify) 2115 xen_intr_signal(txq->xen_intr_handle); 2116 2117 drbr_advance(ifp, br); 2118 } 2119 2120 if (RING_FULL(&txq->ring)) 2121 txq->full = true; 2122 2123 return (0); 2124 } 2125 2126 static int 2127 xn_txq_mq_start(struct ifnet *ifp, struct mbuf *m) 2128 { 2129 struct netfront_info *np; 2130 struct netfront_txq *txq; 2131 int i, npairs, error; 2132 2133 np = ifp->if_softc; 2134 npairs = np->num_queues; 2135 2136 /* check if flowid is set */ 2137 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 2138 i = m->m_pkthdr.flowid % npairs; 2139 else 2140 i = curcpu % npairs; 2141 2142 txq = &np->txq[i]; 2143 2144 if (XN_TX_TRYLOCK(txq) != 0) { 2145 error = xn_txq_mq_start_locked(txq, m); 2146 XN_TX_UNLOCK(txq); 2147 } else { 2148 error = drbr_enqueue(ifp, txq->br, m); 2149 taskqueue_enqueue(txq->tq, &txq->defrtask); 2150 } 2151 2152 return (error); 2153 } 2154 2155 static void 2156 xn_qflush(struct ifnet *ifp) 2157 { 2158 struct netfront_info *np; 2159 struct netfront_txq *txq; 2160 struct mbuf *m; 2161 int i; 2162 2163 np = ifp->if_softc; 2164 2165 for (i = 0; i < np->num_queues; i++) { 2166 txq = &np->txq[i]; 2167 2168 XN_TX_LOCK(txq); 2169 while ((m = buf_ring_dequeue_sc(txq->br)) != NULL) 2170 m_freem(m); 2171 XN_TX_UNLOCK(txq); 2172 } 2173 2174 if_qflush(ifp); 2175 } 2176 2177 /** 2178 * Create a network device. 2179 * @param dev Newbus device representing this virtual NIC. 2180 */ 2181 int 2182 create_netdev(device_t dev) 2183 { 2184 struct netfront_info *np; 2185 int err; 2186 struct ifnet *ifp; 2187 2188 np = device_get_softc(dev); 2189 2190 np->xbdev = dev; 2191 2192 mtx_init(&np->sc_lock, "xnsc", "netfront softc lock", MTX_DEF); 2193 2194 ifmedia_init(&np->sc_media, 0, xn_ifmedia_upd, xn_ifmedia_sts); 2195 ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); 2196 ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL); 2197 2198 err = xen_net_read_mac(dev, np->mac); 2199 if (err != 0) 2200 goto error; 2201 2202 /* Set up ifnet structure */ 2203 ifp = np->xn_ifp = if_alloc(IFT_ETHER); 2204 ifp->if_softc = np; 2205 if_initname(ifp, "xn", device_get_unit(dev)); 2206 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 2207 ifp->if_ioctl = xn_ioctl; 2208 2209 ifp->if_transmit = xn_txq_mq_start; 2210 ifp->if_qflush = xn_qflush; 2211 2212 ifp->if_init = xn_ifinit; 2213 2214 ifp->if_hwassist = XN_CSUM_FEATURES; 2215 ifp->if_capabilities = IFCAP_HWCSUM; 2216 ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 2217 ifp->if_hw_tsomaxsegcount = MAX_TX_REQ_FRAGS; 2218 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 2219 2220 ether_ifattach(ifp, np->mac); 2221 netfront_carrier_off(np); 2222 2223 return (0); 2224 2225 error: 2226 KASSERT(err != 0, ("Error path with no error code specified")); 2227 return (err); 2228 } 2229 2230 static int 2231 netfront_detach(device_t dev) 2232 { 2233 struct netfront_info *info = device_get_softc(dev); 2234 2235 DPRINTK("%s\n", xenbus_get_node(dev)); 2236 2237 netif_free(info); 2238 2239 return 0; 2240 } 2241 2242 static void 2243 netif_free(struct netfront_info *np) 2244 { 2245 2246 XN_LOCK(np); 2247 xn_stop(np); 2248 XN_UNLOCK(np); 2249 netif_disconnect_backend(np); 2250 free(np->rxq, M_DEVBUF); 2251 free(np->txq, M_DEVBUF); 2252 ether_ifdetach(np->xn_ifp); 2253 if_free(np->xn_ifp); 2254 np->xn_ifp = NULL; 2255 ifmedia_removeall(&np->sc_media); 2256 } 2257 2258 static void 2259 netif_disconnect_backend(struct netfront_info *np) 2260 { 2261 u_int i; 2262 2263 for (i = 0; i < np->num_queues; i++) { 2264 XN_RX_LOCK(&np->rxq[i]); 2265 XN_TX_LOCK(&np->txq[i]); 2266 } 2267 netfront_carrier_off(np); 2268 for (i = 0; i < np->num_queues; i++) { 2269 XN_RX_UNLOCK(&np->rxq[i]); 2270 XN_TX_UNLOCK(&np->txq[i]); 2271 } 2272 2273 for (i = 0; i < np->num_queues; i++) { 2274 disconnect_rxq(&np->rxq[i]); 2275 disconnect_txq(&np->txq[i]); 2276 } 2277 } 2278 2279 static int 2280 xn_ifmedia_upd(struct ifnet *ifp) 2281 { 2282 2283 return (0); 2284 } 2285 2286 static void 2287 xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 2288 { 2289 2290 ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE; 2291 ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; 2292 } 2293 2294 /* ** Driver registration ** */ 2295 static device_method_t netfront_methods[] = { 2296 /* Device interface */ 2297 DEVMETHOD(device_probe, netfront_probe), 2298 DEVMETHOD(device_attach, netfront_attach), 2299 DEVMETHOD(device_detach, netfront_detach), 2300 DEVMETHOD(device_shutdown, bus_generic_shutdown), 2301 DEVMETHOD(device_suspend, netfront_suspend), 2302 DEVMETHOD(device_resume, netfront_resume), 2303 2304 /* Xenbus interface */ 2305 DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed), 2306 2307 DEVMETHOD_END 2308 }; 2309 2310 static driver_t netfront_driver = { 2311 "xn", 2312 netfront_methods, 2313 sizeof(struct netfront_info), 2314 }; 2315 devclass_t netfront_devclass; 2316 2317 DRIVER_MODULE(xe, xenbusb_front, netfront_driver, netfront_devclass, NULL, 2318 NULL); 2319