1 /*- 2 * Copyright (c) 2004-2006 Kip Macy 3 * Copyright (c) 2015 Wei Liu <wei.liu2@citrix.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/sockio.h> 36 #include <sys/limits.h> 37 #include <sys/mbuf.h> 38 #include <sys/malloc.h> 39 #include <sys/module.h> 40 #include <sys/kernel.h> 41 #include <sys/socket.h> 42 #include <sys/sysctl.h> 43 #include <sys/taskqueue.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_arp.h> 48 #include <net/ethernet.h> 49 #include <net/if_media.h> 50 #include <net/bpf.h> 51 #include <net/if_types.h> 52 53 #include <netinet/in.h> 54 #include <netinet/ip.h> 55 #include <netinet/if_ether.h> 56 #include <netinet/tcp.h> 57 #include <netinet/tcp_lro.h> 58 59 #include <vm/vm.h> 60 #include <vm/pmap.h> 61 62 #include <sys/bus.h> 63 64 #include <xen/xen-os.h> 65 #include <xen/hypervisor.h> 66 #include <xen/xen_intr.h> 67 #include <xen/gnttab.h> 68 #include <xen/interface/memory.h> 69 #include <xen/interface/io/netif.h> 70 #include <xen/xenbus/xenbusvar.h> 71 72 #include "xenbus_if.h" 73 74 /* Features supported by all backends. TSO and LRO can be negotiated */ 75 #define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) 76 77 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) 78 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) 79 80 /* 81 * Should the driver do LRO on the RX end 82 * this can be toggled on the fly, but the 83 * interface must be reset (down/up) for it 84 * to take effect. 85 */ 86 static int xn_enable_lro = 1; 87 TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro); 88 89 /* 90 * Number of pairs of queues. 91 */ 92 static unsigned long xn_num_queues = 4; 93 TUNABLE_ULONG("hw.xn.num_queues", &xn_num_queues); 94 95 /** 96 * \brief The maximum allowed data fragments in a single transmit 97 * request. 98 * 99 * This limit is imposed by the backend driver. We assume here that 100 * we are dealing with a Linux driver domain and have set our limit 101 * to mirror the Linux MAX_SKB_FRAGS constant. 102 */ 103 #define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2) 104 105 #define RX_COPY_THRESHOLD 256 106 107 #define net_ratelimit() 0 108 109 struct netfront_rxq; 110 struct netfront_txq; 111 struct netfront_info; 112 struct netfront_rx_info; 113 114 static void xn_txeof(struct netfront_txq *); 115 static void xn_rxeof(struct netfront_rxq *); 116 static void xn_alloc_rx_buffers(struct netfront_rxq *); 117 118 static void xn_release_rx_bufs(struct netfront_rxq *); 119 static void xn_release_tx_bufs(struct netfront_txq *); 120 121 static void xn_rxq_intr(void *); 122 static void xn_txq_intr(void *); 123 static int xn_intr(void *); 124 static inline int xn_count_frags(struct mbuf *m); 125 static int xn_assemble_tx_request(struct netfront_txq *, struct mbuf *); 126 static int xn_ioctl(struct ifnet *, u_long, caddr_t); 127 static void xn_ifinit_locked(struct netfront_info *); 128 static void xn_ifinit(void *); 129 static void xn_stop(struct netfront_info *); 130 static void xn_query_features(struct netfront_info *np); 131 static int xn_configure_features(struct netfront_info *np); 132 static void netif_free(struct netfront_info *info); 133 static int netfront_detach(device_t dev); 134 135 static int xn_txq_mq_start_locked(struct netfront_txq *, struct mbuf *); 136 static int xn_txq_mq_start(struct ifnet *, struct mbuf *); 137 138 static int talk_to_backend(device_t dev, struct netfront_info *info); 139 static int create_netdev(device_t dev); 140 static void netif_disconnect_backend(struct netfront_info *info); 141 static int setup_device(device_t dev, struct netfront_info *info, 142 unsigned long); 143 static int xn_ifmedia_upd(struct ifnet *ifp); 144 static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); 145 146 int xn_connect(struct netfront_info *); 147 148 static int xn_get_responses(struct netfront_rxq *, 149 struct netfront_rx_info *, RING_IDX, RING_IDX *, 150 struct mbuf **); 151 152 #define virt_to_mfn(x) (vtophys(x) >> PAGE_SHIFT) 153 154 #define INVALID_P2M_ENTRY (~0UL) 155 156 struct xn_rx_stats 157 { 158 u_long rx_packets; /* total packets received */ 159 u_long rx_bytes; /* total bytes received */ 160 u_long rx_errors; /* bad packets received */ 161 }; 162 163 struct xn_tx_stats 164 { 165 u_long tx_packets; /* total packets transmitted */ 166 u_long tx_bytes; /* total bytes transmitted */ 167 u_long tx_errors; /* packet transmit problems */ 168 }; 169 170 #define XN_QUEUE_NAME_LEN 8 /* xn{t,r}x_%u, allow for two digits */ 171 struct netfront_rxq { 172 struct netfront_info *info; 173 u_int id; 174 char name[XN_QUEUE_NAME_LEN]; 175 struct mtx lock; 176 177 int ring_ref; 178 netif_rx_front_ring_t ring; 179 xen_intr_handle_t xen_intr_handle; 180 181 grant_ref_t gref_head; 182 grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; 183 184 struct mbuf *mbufs[NET_RX_RING_SIZE + 1]; 185 struct mbufq batch; /* batch queue */ 186 int target; 187 188 xen_pfn_t pfn_array[NET_RX_RING_SIZE]; 189 190 struct lro_ctrl lro; 191 192 struct taskqueue *tq; 193 struct task intrtask; 194 195 struct xn_rx_stats stats; 196 }; 197 198 struct netfront_txq { 199 struct netfront_info *info; 200 u_int id; 201 char name[XN_QUEUE_NAME_LEN]; 202 struct mtx lock; 203 204 int ring_ref; 205 netif_tx_front_ring_t ring; 206 xen_intr_handle_t xen_intr_handle; 207 208 grant_ref_t gref_head; 209 grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; 210 211 struct mbuf *mbufs[NET_TX_RING_SIZE + 1]; 212 int mbufs_cnt; 213 struct buf_ring *br; 214 215 struct taskqueue *tq; 216 struct task intrtask; 217 struct task defrtask; 218 219 bool full; 220 221 struct xn_tx_stats stats; 222 }; 223 224 struct netfront_info { 225 struct ifnet *xn_ifp; 226 227 struct mtx sc_lock; 228 229 u_int num_queues; 230 struct netfront_rxq *rxq; 231 struct netfront_txq *txq; 232 233 u_int carrier; 234 u_int maxfrags; 235 236 /* Receive-ring batched refills. */ 237 #define RX_MIN_TARGET 32 238 #define RX_MAX_TARGET NET_RX_RING_SIZE 239 int rx_min_target; 240 int rx_max_target; 241 242 device_t xbdev; 243 uint8_t mac[ETHER_ADDR_LEN]; 244 245 int xn_if_flags; 246 247 struct ifmedia sc_media; 248 249 bool xn_resume; 250 }; 251 252 struct netfront_rx_info { 253 struct netif_rx_response rx; 254 struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; 255 }; 256 257 #define XN_RX_LOCK(_q) mtx_lock(&(_q)->lock) 258 #define XN_RX_UNLOCK(_q) mtx_unlock(&(_q)->lock) 259 260 #define XN_TX_LOCK(_q) mtx_lock(&(_q)->lock) 261 #define XN_TX_TRYLOCK(_q) mtx_trylock(&(_q)->lock) 262 #define XN_TX_UNLOCK(_q) mtx_unlock(&(_q)->lock) 263 264 #define XN_LOCK(_sc) mtx_lock(&(_sc)->sc_lock); 265 #define XN_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_lock); 266 267 #define XN_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_lock, MA_OWNED); 268 #define XN_RX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); 269 #define XN_TX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); 270 271 #define netfront_carrier_on(netif) ((netif)->carrier = 1) 272 #define netfront_carrier_off(netif) ((netif)->carrier = 0) 273 #define netfront_carrier_ok(netif) ((netif)->carrier) 274 275 /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */ 276 277 static inline void 278 add_id_to_freelist(struct mbuf **list, uintptr_t id) 279 { 280 281 KASSERT(id != 0, 282 ("%s: the head item (0) must always be free.", __func__)); 283 list[id] = list[0]; 284 list[0] = (struct mbuf *)id; 285 } 286 287 static inline unsigned short 288 get_id_from_freelist(struct mbuf **list) 289 { 290 uintptr_t id; 291 292 id = (uintptr_t)list[0]; 293 KASSERT(id != 0, 294 ("%s: the head item (0) must always remain free.", __func__)); 295 list[0] = list[id]; 296 return (id); 297 } 298 299 static inline int 300 xn_rxidx(RING_IDX idx) 301 { 302 303 return idx & (NET_RX_RING_SIZE - 1); 304 } 305 306 static inline struct mbuf * 307 xn_get_rx_mbuf(struct netfront_rxq *rxq, RING_IDX ri) 308 { 309 int i; 310 struct mbuf *m; 311 312 i = xn_rxidx(ri); 313 m = rxq->mbufs[i]; 314 rxq->mbufs[i] = NULL; 315 return (m); 316 } 317 318 static inline grant_ref_t 319 xn_get_rx_ref(struct netfront_rxq *rxq, RING_IDX ri) 320 { 321 int i = xn_rxidx(ri); 322 grant_ref_t ref = rxq->grant_ref[i]; 323 324 KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n")); 325 rxq->grant_ref[i] = GRANT_REF_INVALID; 326 return (ref); 327 } 328 329 #define IPRINTK(fmt, args...) \ 330 printf("[XEN] " fmt, ##args) 331 #ifdef INVARIANTS 332 #define WPRINTK(fmt, args...) \ 333 printf("[XEN] " fmt, ##args) 334 #else 335 #define WPRINTK(fmt, args...) 336 #endif 337 #ifdef DEBUG 338 #define DPRINTK(fmt, args...) \ 339 printf("[XEN] %s: " fmt, __func__, ##args) 340 #else 341 #define DPRINTK(fmt, args...) 342 #endif 343 344 /** 345 * Read the 'mac' node at the given device's node in the store, and parse that 346 * as colon-separated octets, placing result the given mac array. mac must be 347 * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h). 348 * Return 0 on success, or errno on error. 349 */ 350 static int 351 xen_net_read_mac(device_t dev, uint8_t mac[]) 352 { 353 int error, i; 354 char *s, *e, *macstr; 355 const char *path; 356 357 path = xenbus_get_node(dev); 358 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 359 if (error == ENOENT) { 360 /* 361 * Deal with missing mac XenStore nodes on devices with 362 * HVM emulation (the 'ioemu' configuration attribute) 363 * enabled. 364 * 365 * The HVM emulator may execute in a stub device model 366 * domain which lacks the permission, only given to Dom0, 367 * to update the guest's XenStore tree. For this reason, 368 * the HVM emulator doesn't even attempt to write the 369 * front-side mac node, even when operating in Dom0. 370 * However, there should always be a mac listed in the 371 * backend tree. Fallback to this version if our query 372 * of the front side XenStore location doesn't find 373 * anything. 374 */ 375 path = xenbus_get_otherend_path(dev); 376 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 377 } 378 if (error != 0) { 379 xenbus_dev_fatal(dev, error, "parsing %s/mac", path); 380 return (error); 381 } 382 383 s = macstr; 384 for (i = 0; i < ETHER_ADDR_LEN; i++) { 385 mac[i] = strtoul(s, &e, 16); 386 if (s == e || (e[0] != ':' && e[0] != 0)) { 387 free(macstr, M_XENBUS); 388 return (ENOENT); 389 } 390 s = &e[1]; 391 } 392 free(macstr, M_XENBUS); 393 return (0); 394 } 395 396 /** 397 * Entry point to this code when a new device is created. Allocate the basic 398 * structures and the ring buffers for communication with the backend, and 399 * inform the backend of the appropriate details for those. Switch to 400 * Connected state. 401 */ 402 static int 403 netfront_probe(device_t dev) 404 { 405 406 if (xen_hvm_domain() && xen_disable_pv_nics != 0) 407 return (ENXIO); 408 409 if (!strcmp(xenbus_get_type(dev), "vif")) { 410 device_set_desc(dev, "Virtual Network Interface"); 411 return (0); 412 } 413 414 return (ENXIO); 415 } 416 417 static int 418 netfront_attach(device_t dev) 419 { 420 int err; 421 422 err = create_netdev(dev); 423 if (err != 0) { 424 xenbus_dev_fatal(dev, err, "creating netdev"); 425 return (err); 426 } 427 428 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), 429 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 430 OID_AUTO, "enable_lro", CTLFLAG_RW, 431 &xn_enable_lro, 0, "Large Receive Offload"); 432 433 SYSCTL_ADD_ULONG(device_get_sysctl_ctx(dev), 434 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 435 OID_AUTO, "num_queues", CTLFLAG_RD, 436 &xn_num_queues, "Number of pairs of queues"); 437 438 return (0); 439 } 440 441 static int 442 netfront_suspend(device_t dev) 443 { 444 struct netfront_info *np = device_get_softc(dev); 445 u_int i; 446 447 for (i = 0; i < np->num_queues; i++) { 448 XN_RX_LOCK(&np->rxq[i]); 449 XN_TX_LOCK(&np->txq[i]); 450 } 451 netfront_carrier_off(np); 452 for (i = 0; i < np->num_queues; i++) { 453 XN_RX_UNLOCK(&np->rxq[i]); 454 XN_TX_UNLOCK(&np->txq[i]); 455 } 456 return (0); 457 } 458 459 /** 460 * We are reconnecting to the backend, due to a suspend/resume, or a backend 461 * driver restart. We tear down our netif structure and recreate it, but 462 * leave the device-layer structures intact so that this is transparent to the 463 * rest of the kernel. 464 */ 465 static int 466 netfront_resume(device_t dev) 467 { 468 struct netfront_info *info = device_get_softc(dev); 469 470 info->xn_resume = true; 471 netif_disconnect_backend(info); 472 return (0); 473 } 474 475 static int 476 write_queue_xenstore_keys(device_t dev, 477 struct netfront_rxq *rxq, 478 struct netfront_txq *txq, 479 struct xs_transaction *xst, bool hierarchy) 480 { 481 int err; 482 const char *message; 483 const char *node = xenbus_get_node(dev); 484 char *path; 485 size_t path_size; 486 487 KASSERT(rxq->id == txq->id, ("Mismatch between RX and TX queue ids")); 488 /* Split event channel support is not yet there. */ 489 KASSERT(rxq->xen_intr_handle == txq->xen_intr_handle, 490 ("Split event channels are not supported")); 491 492 if (hierarchy) { 493 path_size = strlen(node) + 10; 494 path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); 495 snprintf(path, path_size, "%s/queue-%u", node, rxq->id); 496 } else { 497 path_size = strlen(node) + 1; 498 path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); 499 snprintf(path, path_size, "%s", node); 500 } 501 502 err = xs_printf(*xst, path, "tx-ring-ref","%u", txq->ring_ref); 503 if (err != 0) { 504 message = "writing tx ring-ref"; 505 goto error; 506 } 507 err = xs_printf(*xst, path, "rx-ring-ref","%u", rxq->ring_ref); 508 if (err != 0) { 509 message = "writing rx ring-ref"; 510 goto error; 511 } 512 err = xs_printf(*xst, path, "event-channel", "%u", 513 xen_intr_port(rxq->xen_intr_handle)); 514 if (err != 0) { 515 message = "writing event-channel"; 516 goto error; 517 } 518 519 free(path, M_DEVBUF); 520 521 return (0); 522 523 error: 524 free(path, M_DEVBUF); 525 xenbus_dev_fatal(dev, err, "%s", message); 526 527 return (err); 528 } 529 530 /* Common code used when first setting up, and when resuming. */ 531 static int 532 talk_to_backend(device_t dev, struct netfront_info *info) 533 { 534 const char *message; 535 struct xs_transaction xst; 536 const char *node = xenbus_get_node(dev); 537 int err; 538 unsigned long num_queues, max_queues = 0; 539 unsigned int i; 540 541 err = xen_net_read_mac(dev, info->mac); 542 if (err != 0) { 543 xenbus_dev_fatal(dev, err, "parsing %s/mac", node); 544 goto out; 545 } 546 547 err = xs_scanf(XST_NIL, xenbus_get_otherend_path(info->xbdev), 548 "multi-queue-max-queues", NULL, "%lu", &max_queues); 549 if (err != 0) 550 max_queues = 1; 551 num_queues = xn_num_queues; 552 if (num_queues > max_queues) 553 num_queues = max_queues; 554 555 err = setup_device(dev, info, num_queues); 556 if (err != 0) 557 goto out; 558 559 again: 560 err = xs_transaction_start(&xst); 561 if (err != 0) { 562 xenbus_dev_fatal(dev, err, "starting transaction"); 563 goto free; 564 } 565 566 if (info->num_queues == 1) { 567 err = write_queue_xenstore_keys(dev, &info->rxq[0], 568 &info->txq[0], &xst, false); 569 if (err != 0) 570 goto abort_transaction_no_def_error; 571 } else { 572 err = xs_printf(xst, node, "multi-queue-num-queues", 573 "%u", info->num_queues); 574 if (err != 0) { 575 message = "writing multi-queue-num-queues"; 576 goto abort_transaction; 577 } 578 579 for (i = 0; i < info->num_queues; i++) { 580 err = write_queue_xenstore_keys(dev, &info->rxq[i], 581 &info->txq[i], &xst, true); 582 if (err != 0) 583 goto abort_transaction_no_def_error; 584 } 585 } 586 587 err = xs_printf(xst, node, "request-rx-copy", "%u", 1); 588 if (err != 0) { 589 message = "writing request-rx-copy"; 590 goto abort_transaction; 591 } 592 err = xs_printf(xst, node, "feature-rx-notify", "%d", 1); 593 if (err != 0) { 594 message = "writing feature-rx-notify"; 595 goto abort_transaction; 596 } 597 err = xs_printf(xst, node, "feature-sg", "%d", 1); 598 if (err != 0) { 599 message = "writing feature-sg"; 600 goto abort_transaction; 601 } 602 err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1); 603 if (err != 0) { 604 message = "writing feature-gso-tcpv4"; 605 goto abort_transaction; 606 } 607 608 err = xs_transaction_end(xst, 0); 609 if (err != 0) { 610 if (err == EAGAIN) 611 goto again; 612 xenbus_dev_fatal(dev, err, "completing transaction"); 613 goto free; 614 } 615 616 return 0; 617 618 abort_transaction: 619 xenbus_dev_fatal(dev, err, "%s", message); 620 abort_transaction_no_def_error: 621 xs_transaction_end(xst, 1); 622 free: 623 netif_free(info); 624 out: 625 return (err); 626 } 627 628 static void 629 xn_rxq_tq_intr(void *xrxq, int pending) 630 { 631 struct netfront_rxq *rxq = xrxq; 632 633 XN_RX_LOCK(rxq); 634 xn_rxeof(rxq); 635 XN_RX_UNLOCK(rxq); 636 } 637 638 static void 639 xn_txq_start(struct netfront_txq *txq) 640 { 641 struct netfront_info *np = txq->info; 642 struct ifnet *ifp = np->xn_ifp; 643 644 XN_TX_LOCK_ASSERT(txq); 645 if (!drbr_empty(ifp, txq->br)) 646 xn_txq_mq_start_locked(txq, NULL); 647 } 648 649 static void 650 xn_txq_tq_intr(void *xtxq, int pending) 651 { 652 struct netfront_txq *txq = xtxq; 653 654 XN_TX_LOCK(txq); 655 if (RING_HAS_UNCONSUMED_RESPONSES(&txq->ring)) 656 xn_txeof(txq); 657 xn_txq_start(txq); 658 XN_TX_UNLOCK(txq); 659 } 660 661 static void 662 xn_txq_tq_deferred(void *xtxq, int pending) 663 { 664 struct netfront_txq *txq = xtxq; 665 666 XN_TX_LOCK(txq); 667 xn_txq_start(txq); 668 XN_TX_UNLOCK(txq); 669 } 670 671 static void 672 disconnect_rxq(struct netfront_rxq *rxq) 673 { 674 675 xn_release_rx_bufs(rxq); 676 gnttab_free_grant_references(rxq->gref_head); 677 gnttab_end_foreign_access_ref(rxq->ring_ref); 678 /* 679 * No split event channel support at the moment, handle will 680 * be unbound in tx. So no need to call xen_intr_unbind here, 681 * but we do want to reset the handler to 0. 682 */ 683 rxq->xen_intr_handle = 0; 684 } 685 686 static void 687 destroy_rxq(struct netfront_rxq *rxq) 688 { 689 690 free(rxq->ring.sring, M_DEVBUF); 691 taskqueue_drain_all(rxq->tq); 692 taskqueue_free(rxq->tq); 693 } 694 695 static void 696 destroy_rxqs(struct netfront_info *np) 697 { 698 int i; 699 700 for (i = 0; i < np->num_queues; i++) 701 destroy_rxq(&np->rxq[i]); 702 703 free(np->rxq, M_DEVBUF); 704 np->rxq = NULL; 705 } 706 707 static int 708 setup_rxqs(device_t dev, struct netfront_info *info, 709 unsigned long num_queues) 710 { 711 int q, i; 712 int error; 713 netif_rx_sring_t *rxs; 714 struct netfront_rxq *rxq; 715 716 info->rxq = malloc(sizeof(struct netfront_rxq) * num_queues, 717 M_DEVBUF, M_WAITOK|M_ZERO); 718 719 for (q = 0; q < num_queues; q++) { 720 rxq = &info->rxq[q]; 721 722 rxq->id = q; 723 rxq->info = info; 724 rxq->target = RX_MIN_TARGET; 725 rxq->ring_ref = GRANT_REF_INVALID; 726 rxq->ring.sring = NULL; 727 snprintf(rxq->name, XN_QUEUE_NAME_LEN, "xnrx_%u", q); 728 mtx_init(&rxq->lock, rxq->name, "netfront receive lock", 729 MTX_DEF); 730 731 for (i = 0; i <= NET_RX_RING_SIZE; i++) { 732 rxq->mbufs[i] = NULL; 733 rxq->grant_ref[i] = GRANT_REF_INVALID; 734 } 735 736 mbufq_init(&rxq->batch, INT_MAX); 737 738 /* Start resources allocation */ 739 740 if (gnttab_alloc_grant_references(RX_MAX_TARGET, 741 &rxq->gref_head) != 0) { 742 device_printf(dev, "allocating rx gref"); 743 error = ENOMEM; 744 goto fail; 745 } 746 747 rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, 748 M_WAITOK|M_ZERO); 749 SHARED_RING_INIT(rxs); 750 FRONT_RING_INIT(&rxq->ring, rxs, PAGE_SIZE); 751 752 error = xenbus_grant_ring(dev, virt_to_mfn(rxs), 753 &rxq->ring_ref); 754 if (error != 0) { 755 device_printf(dev, "granting rx ring page"); 756 goto fail_grant_ring; 757 } 758 759 TASK_INIT(&rxq->intrtask, 0, xn_rxq_tq_intr, rxq); 760 rxq->tq = taskqueue_create_fast(rxq->name, M_WAITOK, 761 taskqueue_thread_enqueue, &rxq->tq); 762 763 error = taskqueue_start_threads(&rxq->tq, 1, PI_NET, 764 "%s rxq %d", device_get_nameunit(dev), rxq->id); 765 if (error != 0) { 766 device_printf(dev, "failed to start rx taskq %d\n", 767 rxq->id); 768 goto fail_start_thread; 769 } 770 } 771 772 return (0); 773 774 fail_start_thread: 775 gnttab_end_foreign_access_ref(rxq->ring_ref); 776 taskqueue_drain_all(rxq->tq); 777 taskqueue_free(rxq->tq); 778 fail_grant_ring: 779 gnttab_free_grant_references(rxq->gref_head); 780 free(rxq->ring.sring, M_DEVBUF); 781 fail: 782 for (; q >= 0; q--) { 783 disconnect_rxq(&info->rxq[q]); 784 destroy_rxq(&info->rxq[q]); 785 } 786 787 free(info->rxq, M_DEVBUF); 788 return (error); 789 } 790 791 static void 792 disconnect_txq(struct netfront_txq *txq) 793 { 794 795 xn_release_tx_bufs(txq); 796 gnttab_free_grant_references(txq->gref_head); 797 gnttab_end_foreign_access_ref(txq->ring_ref); 798 xen_intr_unbind(&txq->xen_intr_handle); 799 } 800 801 static void 802 destroy_txq(struct netfront_txq *txq) 803 { 804 805 free(txq->ring.sring, M_DEVBUF); 806 buf_ring_free(txq->br, M_DEVBUF); 807 taskqueue_drain_all(txq->tq); 808 taskqueue_free(txq->tq); 809 } 810 811 static void 812 destroy_txqs(struct netfront_info *np) 813 { 814 int i; 815 816 for (i = 0; i < np->num_queues; i++) 817 destroy_txq(&np->txq[i]); 818 819 free(np->txq, M_DEVBUF); 820 np->txq = NULL; 821 } 822 823 static int 824 setup_txqs(device_t dev, struct netfront_info *info, 825 unsigned long num_queues) 826 { 827 int q, i; 828 int error; 829 netif_tx_sring_t *txs; 830 struct netfront_txq *txq; 831 832 info->txq = malloc(sizeof(struct netfront_txq) * num_queues, 833 M_DEVBUF, M_WAITOK|M_ZERO); 834 835 for (q = 0; q < num_queues; q++) { 836 txq = &info->txq[q]; 837 838 txq->id = q; 839 txq->info = info; 840 841 txq->ring_ref = GRANT_REF_INVALID; 842 txq->ring.sring = NULL; 843 844 snprintf(txq->name, XN_QUEUE_NAME_LEN, "xntx_%u", q); 845 846 mtx_init(&txq->lock, txq->name, "netfront transmit lock", 847 MTX_DEF); 848 849 for (i = 0; i <= NET_TX_RING_SIZE; i++) { 850 txq->mbufs[i] = (void *) ((u_long) i+1); 851 txq->grant_ref[i] = GRANT_REF_INVALID; 852 } 853 txq->mbufs[NET_TX_RING_SIZE] = (void *)0; 854 855 /* Start resources allocation. */ 856 857 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, 858 &txq->gref_head) != 0) { 859 device_printf(dev, "failed to allocate tx grant refs\n"); 860 error = ENOMEM; 861 goto fail; 862 } 863 864 txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, 865 M_WAITOK|M_ZERO); 866 SHARED_RING_INIT(txs); 867 FRONT_RING_INIT(&txq->ring, txs, PAGE_SIZE); 868 869 error = xenbus_grant_ring(dev, virt_to_mfn(txs), 870 &txq->ring_ref); 871 if (error != 0) { 872 device_printf(dev, "failed to grant tx ring\n"); 873 goto fail_grant_ring; 874 } 875 876 txq->br = buf_ring_alloc(NET_TX_RING_SIZE, M_DEVBUF, 877 M_WAITOK, &txq->lock); 878 TASK_INIT(&txq->defrtask, 0, xn_txq_tq_deferred, txq); 879 TASK_INIT(&txq->intrtask, 0, xn_txq_tq_intr, txq); 880 881 txq->tq = taskqueue_create_fast(txq->name, M_WAITOK, 882 taskqueue_thread_enqueue, &txq->tq); 883 884 error = taskqueue_start_threads(&txq->tq, 1, PI_NET, 885 "%s txq %d", device_get_nameunit(dev), txq->id); 886 if (error != 0) { 887 device_printf(dev, "failed to start tx taskq %d\n", 888 txq->id); 889 goto fail_start_thread; 890 } 891 892 error = xen_intr_alloc_and_bind_local_port(dev, 893 xenbus_get_otherend_id(dev), xn_intr, /* handler */ NULL, 894 &info->txq[q], 895 INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY, 896 &txq->xen_intr_handle); 897 898 if (error != 0) { 899 device_printf(dev, "xen_intr_alloc_and_bind_local_port failed\n"); 900 goto fail_bind_port; 901 } 902 } 903 904 return (0); 905 906 fail_bind_port: 907 taskqueue_drain_all(txq->tq); 908 fail_start_thread: 909 buf_ring_free(txq->br, M_DEVBUF); 910 taskqueue_free(txq->tq); 911 gnttab_end_foreign_access_ref(txq->ring_ref); 912 fail_grant_ring: 913 gnttab_free_grant_references(txq->gref_head); 914 free(txq->ring.sring, M_DEVBUF); 915 fail: 916 for (; q >= 0; q--) { 917 disconnect_txq(&info->txq[q]); 918 destroy_txq(&info->txq[q]); 919 } 920 921 free(info->txq, M_DEVBUF); 922 return (error); 923 } 924 925 static int 926 setup_device(device_t dev, struct netfront_info *info, 927 unsigned long num_queues) 928 { 929 int error; 930 int q; 931 932 if (info->txq) 933 destroy_txqs(info); 934 935 if (info->rxq) 936 destroy_rxqs(info); 937 938 info->num_queues = 0; 939 940 error = setup_rxqs(dev, info, num_queues); 941 if (error != 0) 942 goto out; 943 error = setup_txqs(dev, info, num_queues); 944 if (error != 0) 945 goto out; 946 947 info->num_queues = num_queues; 948 949 /* No split event channel at the moment. */ 950 for (q = 0; q < num_queues; q++) 951 info->rxq[q].xen_intr_handle = info->txq[q].xen_intr_handle; 952 953 return (0); 954 955 out: 956 KASSERT(error != 0, ("Error path taken without providing an error code")); 957 return (error); 958 } 959 960 #ifdef INET 961 /** 962 * If this interface has an ipv4 address, send an arp for it. This 963 * helps to get the network going again after migrating hosts. 964 */ 965 static void 966 netfront_send_fake_arp(device_t dev, struct netfront_info *info) 967 { 968 struct ifnet *ifp; 969 struct ifaddr *ifa; 970 971 ifp = info->xn_ifp; 972 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 973 if (ifa->ifa_addr->sa_family == AF_INET) { 974 arp_ifinit(ifp, ifa); 975 } 976 } 977 } 978 #endif 979 980 /** 981 * Callback received when the backend's state changes. 982 */ 983 static void 984 netfront_backend_changed(device_t dev, XenbusState newstate) 985 { 986 struct netfront_info *sc = device_get_softc(dev); 987 988 DPRINTK("newstate=%d\n", newstate); 989 990 switch (newstate) { 991 case XenbusStateInitialising: 992 case XenbusStateInitialised: 993 case XenbusStateUnknown: 994 case XenbusStateClosed: 995 case XenbusStateReconfigured: 996 case XenbusStateReconfiguring: 997 break; 998 case XenbusStateInitWait: 999 if (xenbus_get_state(dev) != XenbusStateInitialising) 1000 break; 1001 if (xn_connect(sc) != 0) 1002 break; 1003 xenbus_set_state(dev, XenbusStateConnected); 1004 break; 1005 case XenbusStateClosing: 1006 xenbus_set_state(dev, XenbusStateClosed); 1007 break; 1008 case XenbusStateConnected: 1009 #ifdef INET 1010 netfront_send_fake_arp(dev, sc); 1011 #endif 1012 break; 1013 } 1014 } 1015 1016 /** 1017 * \brief Verify that there is sufficient space in the Tx ring 1018 * buffer for a maximally sized request to be enqueued. 1019 * 1020 * A transmit request requires a transmit descriptor for each packet 1021 * fragment, plus up to 2 entries for "options" (e.g. TSO). 1022 */ 1023 static inline int 1024 xn_tx_slot_available(struct netfront_txq *txq) 1025 { 1026 1027 return (RING_FREE_REQUESTS(&txq->ring) > (MAX_TX_REQ_FRAGS + 2)); 1028 } 1029 1030 static void 1031 xn_release_tx_bufs(struct netfront_txq *txq) 1032 { 1033 int i; 1034 1035 for (i = 1; i <= NET_TX_RING_SIZE; i++) { 1036 struct mbuf *m; 1037 1038 m = txq->mbufs[i]; 1039 1040 /* 1041 * We assume that no kernel addresses are 1042 * less than NET_TX_RING_SIZE. Any entry 1043 * in the table that is below this number 1044 * must be an index from free-list tracking. 1045 */ 1046 if (((uintptr_t)m) <= NET_TX_RING_SIZE) 1047 continue; 1048 gnttab_end_foreign_access_ref(txq->grant_ref[i]); 1049 gnttab_release_grant_reference(&txq->gref_head, 1050 txq->grant_ref[i]); 1051 txq->grant_ref[i] = GRANT_REF_INVALID; 1052 add_id_to_freelist(txq->mbufs, i); 1053 txq->mbufs_cnt--; 1054 if (txq->mbufs_cnt < 0) { 1055 panic("%s: tx_chain_cnt must be >= 0", __func__); 1056 } 1057 m_free(m); 1058 } 1059 } 1060 1061 static void 1062 xn_alloc_rx_buffers(struct netfront_rxq *rxq) 1063 { 1064 struct netfront_info *np = rxq->info; 1065 int otherend_id = xenbus_get_otherend_id(np->xbdev); 1066 unsigned short id; 1067 struct mbuf *m_new; 1068 int i, batch_target, notify; 1069 RING_IDX req_prod; 1070 grant_ref_t ref; 1071 netif_rx_request_t *req; 1072 vm_offset_t vaddr; 1073 u_long pfn; 1074 1075 req_prod = rxq->ring.req_prod_pvt; 1076 1077 if (__predict_false(np->carrier == 0)) 1078 return; 1079 1080 /* 1081 * Allocate mbufs greedily, even though we batch updates to the 1082 * receive ring. This creates a less bursty demand on the memory 1083 * allocator, and so should reduce the chance of failed allocation 1084 * requests both for ourself and for other kernel subsystems. 1085 * 1086 * Here we attempt to maintain rx_target buffers in flight, counting 1087 * buffers that we have yet to process in the receive ring. 1088 */ 1089 batch_target = rxq->target - (req_prod - rxq->ring.rsp_cons); 1090 for (i = mbufq_len(&rxq->batch); i < batch_target; i++) { 1091 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); 1092 if (m_new == NULL) { 1093 if (i != 0) 1094 goto refill; 1095 /* XXX set timer */ 1096 break; 1097 } 1098 m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE; 1099 1100 /* queue the mbufs allocated */ 1101 mbufq_enqueue(&rxq->batch, m_new); 1102 } 1103 1104 /* 1105 * If we've allocated at least half of our target number of entries, 1106 * submit them to the backend - we have enough to make the overhead 1107 * of submission worthwhile. Otherwise wait for more mbufs and 1108 * request entries to become available. 1109 */ 1110 if (i < (rxq->target/2)) { 1111 if (req_prod > rxq->ring.sring->req_prod) 1112 goto push; 1113 return; 1114 } 1115 1116 /* 1117 * Double floating fill target if we risked having the backend 1118 * run out of empty buffers for receive traffic. We define "running 1119 * low" as having less than a fourth of our target buffers free 1120 * at the time we refilled the queue. 1121 */ 1122 if ((req_prod - rxq->ring.sring->rsp_prod) < (rxq->target / 4)) { 1123 rxq->target *= 2; 1124 if (rxq->target > np->rx_max_target) 1125 rxq->target = np->rx_max_target; 1126 } 1127 1128 refill: 1129 for (i = 0; ; i++) { 1130 if ((m_new = mbufq_dequeue(&rxq->batch)) == NULL) 1131 break; 1132 1133 m_new->m_ext.ext_arg1 = (vm_paddr_t *)(uintptr_t)( 1134 vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT); 1135 1136 id = xn_rxidx(req_prod + i); 1137 1138 KASSERT(rxq->mbufs[id] == NULL, ("non-NULL xn_rx_chain")); 1139 rxq->mbufs[id] = m_new; 1140 1141 ref = gnttab_claim_grant_reference(&rxq->gref_head); 1142 KASSERT(ref != GNTTAB_LIST_END, 1143 ("reserved grant references exhuasted")); 1144 rxq->grant_ref[id] = ref; 1145 1146 vaddr = mtod(m_new, vm_offset_t); 1147 pfn = vtophys(vaddr) >> PAGE_SHIFT; 1148 req = RING_GET_REQUEST(&rxq->ring, req_prod + i); 1149 1150 gnttab_grant_foreign_access_ref(ref, otherend_id, pfn, 0); 1151 req->id = id; 1152 req->gref = ref; 1153 1154 rxq->pfn_array[i] = 1155 vtophys(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT; 1156 } 1157 1158 KASSERT(i, ("no mbufs processed")); /* should have returned earlier */ 1159 KASSERT(mbufq_len(&rxq->batch) == 0, ("not all mbufs processed")); 1160 /* 1161 * We may have allocated buffers which have entries outstanding 1162 * in the page * update queue -- make sure we flush those first! 1163 */ 1164 wmb(); 1165 1166 /* Above is a suitable barrier to ensure backend will see requests. */ 1167 rxq->ring.req_prod_pvt = req_prod + i; 1168 push: 1169 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rxq->ring, notify); 1170 if (notify) 1171 xen_intr_signal(rxq->xen_intr_handle); 1172 } 1173 1174 static void 1175 xn_release_rx_bufs(struct netfront_rxq *rxq) 1176 { 1177 int i, ref; 1178 struct mbuf *m; 1179 1180 for (i = 0; i < NET_RX_RING_SIZE; i++) { 1181 m = rxq->mbufs[i]; 1182 1183 if (m == NULL) 1184 continue; 1185 1186 ref = rxq->grant_ref[i]; 1187 if (ref == GRANT_REF_INVALID) 1188 continue; 1189 1190 gnttab_end_foreign_access_ref(ref); 1191 gnttab_release_grant_reference(&rxq->gref_head, ref); 1192 rxq->mbufs[i] = NULL; 1193 rxq->grant_ref[i] = GRANT_REF_INVALID; 1194 m_freem(m); 1195 } 1196 } 1197 1198 static void 1199 xn_rxeof(struct netfront_rxq *rxq) 1200 { 1201 struct ifnet *ifp; 1202 struct netfront_info *np = rxq->info; 1203 #if (defined(INET) || defined(INET6)) 1204 struct lro_ctrl *lro = &rxq->lro; 1205 #endif 1206 struct netfront_rx_info rinfo; 1207 struct netif_rx_response *rx = &rinfo.rx; 1208 struct netif_extra_info *extras = rinfo.extras; 1209 RING_IDX i, rp; 1210 struct mbuf *m; 1211 struct mbufq mbufq_rxq, mbufq_errq; 1212 int err, work_to_do; 1213 1214 do { 1215 XN_RX_LOCK_ASSERT(rxq); 1216 if (!netfront_carrier_ok(np)) 1217 return; 1218 1219 /* XXX: there should be some sane limit. */ 1220 mbufq_init(&mbufq_errq, INT_MAX); 1221 mbufq_init(&mbufq_rxq, INT_MAX); 1222 1223 ifp = np->xn_ifp; 1224 1225 rp = rxq->ring.sring->rsp_prod; 1226 rmb(); /* Ensure we see queued responses up to 'rp'. */ 1227 1228 i = rxq->ring.rsp_cons; 1229 while ((i != rp)) { 1230 memcpy(rx, RING_GET_RESPONSE(&rxq->ring, i), sizeof(*rx)); 1231 memset(extras, 0, sizeof(rinfo.extras)); 1232 1233 m = NULL; 1234 err = xn_get_responses(rxq, &rinfo, rp, &i, &m); 1235 1236 if (__predict_false(err)) { 1237 if (m) 1238 (void )mbufq_enqueue(&mbufq_errq, m); 1239 rxq->stats.rx_errors++; 1240 continue; 1241 } 1242 1243 m->m_pkthdr.rcvif = ifp; 1244 if ( rx->flags & NETRXF_data_validated ) { 1245 /* Tell the stack the checksums are okay */ 1246 /* 1247 * XXX this isn't necessarily the case - need to add 1248 * check 1249 */ 1250 1251 m->m_pkthdr.csum_flags |= 1252 (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID 1253 | CSUM_PSEUDO_HDR); 1254 m->m_pkthdr.csum_data = 0xffff; 1255 } 1256 1257 rxq->stats.rx_packets++; 1258 rxq->stats.rx_bytes += m->m_pkthdr.len; 1259 1260 (void )mbufq_enqueue(&mbufq_rxq, m); 1261 rxq->ring.rsp_cons = i; 1262 } 1263 1264 mbufq_drain(&mbufq_errq); 1265 1266 /* 1267 * Process all the mbufs after the remapping is complete. 1268 * Break the mbuf chain first though. 1269 */ 1270 while ((m = mbufq_dequeue(&mbufq_rxq)) != NULL) { 1271 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 1272 1273 /* XXX: Do we really need to drop the rx lock? */ 1274 XN_RX_UNLOCK(rxq); 1275 #if (defined(INET) || defined(INET6)) 1276 /* Use LRO if possible */ 1277 if ((ifp->if_capenable & IFCAP_LRO) == 0 || 1278 lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) { 1279 /* 1280 * If LRO fails, pass up to the stack 1281 * directly. 1282 */ 1283 (*ifp->if_input)(ifp, m); 1284 } 1285 #else 1286 (*ifp->if_input)(ifp, m); 1287 #endif 1288 1289 XN_RX_LOCK(rxq); 1290 } 1291 1292 rxq->ring.rsp_cons = i; 1293 1294 #if (defined(INET) || defined(INET6)) 1295 /* 1296 * Flush any outstanding LRO work 1297 */ 1298 tcp_lro_flush_all(lro); 1299 #endif 1300 1301 xn_alloc_rx_buffers(rxq); 1302 1303 RING_FINAL_CHECK_FOR_RESPONSES(&rxq->ring, work_to_do); 1304 } while (work_to_do); 1305 } 1306 1307 static void 1308 xn_txeof(struct netfront_txq *txq) 1309 { 1310 RING_IDX i, prod; 1311 unsigned short id; 1312 struct ifnet *ifp; 1313 netif_tx_response_t *txr; 1314 struct mbuf *m; 1315 struct netfront_info *np = txq->info; 1316 1317 XN_TX_LOCK_ASSERT(txq); 1318 1319 if (!netfront_carrier_ok(np)) 1320 return; 1321 1322 ifp = np->xn_ifp; 1323 1324 do { 1325 prod = txq->ring.sring->rsp_prod; 1326 rmb(); /* Ensure we see responses up to 'rp'. */ 1327 1328 for (i = txq->ring.rsp_cons; i != prod; i++) { 1329 txr = RING_GET_RESPONSE(&txq->ring, i); 1330 if (txr->status == NETIF_RSP_NULL) 1331 continue; 1332 1333 if (txr->status != NETIF_RSP_OKAY) { 1334 printf("%s: WARNING: response is %d!\n", 1335 __func__, txr->status); 1336 } 1337 id = txr->id; 1338 m = txq->mbufs[id]; 1339 KASSERT(m != NULL, ("mbuf not found in chain")); 1340 KASSERT((uintptr_t)m > NET_TX_RING_SIZE, 1341 ("mbuf already on the free list, but we're " 1342 "trying to free it again!")); 1343 M_ASSERTVALID(m); 1344 1345 /* 1346 * Increment packet count if this is the last 1347 * mbuf of the chain. 1348 */ 1349 if (!m->m_next) 1350 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 1351 if (__predict_false(gnttab_query_foreign_access( 1352 txq->grant_ref[id]) != 0)) { 1353 panic("%s: grant id %u still in use by the " 1354 "backend", __func__, id); 1355 } 1356 gnttab_end_foreign_access_ref(txq->grant_ref[id]); 1357 gnttab_release_grant_reference( 1358 &txq->gref_head, txq->grant_ref[id]); 1359 txq->grant_ref[id] = GRANT_REF_INVALID; 1360 1361 txq->mbufs[id] = NULL; 1362 add_id_to_freelist(txq->mbufs, id); 1363 txq->mbufs_cnt--; 1364 m_free(m); 1365 /* Only mark the txq active if we've freed up at least one slot to try */ 1366 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1367 } 1368 txq->ring.rsp_cons = prod; 1369 1370 /* 1371 * Set a new event, then check for race with update of 1372 * tx_cons. Note that it is essential to schedule a 1373 * callback, no matter how few buffers are pending. Even if 1374 * there is space in the transmit ring, higher layers may 1375 * be blocked because too much data is outstanding: in such 1376 * cases notification from Xen is likely to be the only kick 1377 * that we'll get. 1378 */ 1379 txq->ring.sring->rsp_event = 1380 prod + ((txq->ring.sring->req_prod - prod) >> 1) + 1; 1381 1382 mb(); 1383 } while (prod != txq->ring.sring->rsp_prod); 1384 1385 if (txq->full && 1386 ((txq->ring.sring->req_prod - prod) < NET_TX_RING_SIZE)) { 1387 txq->full = false; 1388 taskqueue_enqueue(txq->tq, &txq->intrtask); 1389 } 1390 } 1391 1392 1393 static void 1394 xn_rxq_intr(void *xrxq) 1395 { 1396 struct netfront_rxq *rxq = xrxq; 1397 1398 taskqueue_enqueue(rxq->tq, &rxq->intrtask); 1399 } 1400 1401 static void 1402 xn_txq_intr(void *xtxq) 1403 { 1404 struct netfront_txq *txq = xtxq; 1405 1406 taskqueue_enqueue(txq->tq, &txq->intrtask); 1407 } 1408 1409 static int 1410 xn_intr(void *xsc) 1411 { 1412 struct netfront_txq *txq = xsc; 1413 struct netfront_info *np = txq->info; 1414 struct netfront_rxq *rxq = &np->rxq[txq->id]; 1415 1416 /* kick both tx and rx */ 1417 xn_rxq_intr(rxq); 1418 xn_txq_intr(txq); 1419 1420 return (FILTER_HANDLED); 1421 } 1422 1423 static void 1424 xn_move_rx_slot(struct netfront_rxq *rxq, struct mbuf *m, 1425 grant_ref_t ref) 1426 { 1427 int new = xn_rxidx(rxq->ring.req_prod_pvt); 1428 1429 KASSERT(rxq->mbufs[new] == NULL, ("mbufs != NULL")); 1430 rxq->mbufs[new] = m; 1431 rxq->grant_ref[new] = ref; 1432 RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->id = new; 1433 RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->gref = ref; 1434 rxq->ring.req_prod_pvt++; 1435 } 1436 1437 static int 1438 xn_get_extras(struct netfront_rxq *rxq, 1439 struct netif_extra_info *extras, RING_IDX rp, RING_IDX *cons) 1440 { 1441 struct netif_extra_info *extra; 1442 1443 int err = 0; 1444 1445 do { 1446 struct mbuf *m; 1447 grant_ref_t ref; 1448 1449 if (__predict_false(*cons + 1 == rp)) { 1450 err = EINVAL; 1451 break; 1452 } 1453 1454 extra = (struct netif_extra_info *) 1455 RING_GET_RESPONSE(&rxq->ring, ++(*cons)); 1456 1457 if (__predict_false(!extra->type || 1458 extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 1459 err = EINVAL; 1460 } else { 1461 memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); 1462 } 1463 1464 m = xn_get_rx_mbuf(rxq, *cons); 1465 ref = xn_get_rx_ref(rxq, *cons); 1466 xn_move_rx_slot(rxq, m, ref); 1467 } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); 1468 1469 return err; 1470 } 1471 1472 static int 1473 xn_get_responses(struct netfront_rxq *rxq, 1474 struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, 1475 struct mbuf **list) 1476 { 1477 struct netif_rx_response *rx = &rinfo->rx; 1478 struct netif_extra_info *extras = rinfo->extras; 1479 struct mbuf *m, *m0, *m_prev; 1480 grant_ref_t ref = xn_get_rx_ref(rxq, *cons); 1481 RING_IDX ref_cons = *cons; 1482 int frags = 1; 1483 int err = 0; 1484 u_long ret; 1485 1486 m0 = m = m_prev = xn_get_rx_mbuf(rxq, *cons); 1487 1488 if (rx->flags & NETRXF_extra_info) { 1489 err = xn_get_extras(rxq, extras, rp, cons); 1490 } 1491 1492 if (m0 != NULL) { 1493 m0->m_pkthdr.len = 0; 1494 m0->m_next = NULL; 1495 } 1496 1497 for (;;) { 1498 #if 0 1499 DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n", 1500 rx->status, rx->offset, frags); 1501 #endif 1502 if (__predict_false(rx->status < 0 || 1503 rx->offset + rx->status > PAGE_SIZE)) { 1504 1505 xn_move_rx_slot(rxq, m, ref); 1506 if (m0 == m) 1507 m0 = NULL; 1508 m = NULL; 1509 err = EINVAL; 1510 goto next_skip_queue; 1511 } 1512 1513 /* 1514 * This definitely indicates a bug, either in this driver or in 1515 * the backend driver. In future this should flag the bad 1516 * situation to the system controller to reboot the backed. 1517 */ 1518 if (ref == GRANT_REF_INVALID) { 1519 printf("%s: Bad rx response id %d.\n", __func__, rx->id); 1520 err = EINVAL; 1521 goto next; 1522 } 1523 1524 ret = gnttab_end_foreign_access_ref(ref); 1525 KASSERT(ret, ("Unable to end access to grant references")); 1526 1527 gnttab_release_grant_reference(&rxq->gref_head, ref); 1528 1529 next: 1530 if (m == NULL) 1531 break; 1532 1533 m->m_len = rx->status; 1534 m->m_data += rx->offset; 1535 m0->m_pkthdr.len += rx->status; 1536 1537 next_skip_queue: 1538 if (!(rx->flags & NETRXF_more_data)) 1539 break; 1540 1541 if (*cons + frags == rp) { 1542 if (net_ratelimit()) 1543 WPRINTK("Need more frags\n"); 1544 err = ENOENT; 1545 printf("%s: cons %u frags %u rp %u, not enough frags\n", 1546 __func__, *cons, frags, rp); 1547 break; 1548 } 1549 /* 1550 * Note that m can be NULL, if rx->status < 0 or if 1551 * rx->offset + rx->status > PAGE_SIZE above. 1552 */ 1553 m_prev = m; 1554 1555 rx = RING_GET_RESPONSE(&rxq->ring, *cons + frags); 1556 m = xn_get_rx_mbuf(rxq, *cons + frags); 1557 1558 /* 1559 * m_prev == NULL can happen if rx->status < 0 or if 1560 * rx->offset + * rx->status > PAGE_SIZE above. 1561 */ 1562 if (m_prev != NULL) 1563 m_prev->m_next = m; 1564 1565 /* 1566 * m0 can be NULL if rx->status < 0 or if * rx->offset + 1567 * rx->status > PAGE_SIZE above. 1568 */ 1569 if (m0 == NULL) 1570 m0 = m; 1571 m->m_next = NULL; 1572 ref = xn_get_rx_ref(rxq, *cons + frags); 1573 ref_cons = *cons + frags; 1574 frags++; 1575 } 1576 *list = m0; 1577 *cons += frags; 1578 1579 return (err); 1580 } 1581 1582 /** 1583 * \brief Count the number of fragments in an mbuf chain. 1584 * 1585 * Surprisingly, there isn't an M* macro for this. 1586 */ 1587 static inline int 1588 xn_count_frags(struct mbuf *m) 1589 { 1590 int nfrags; 1591 1592 for (nfrags = 0; m != NULL; m = m->m_next) 1593 nfrags++; 1594 1595 return (nfrags); 1596 } 1597 1598 /** 1599 * Given an mbuf chain, make sure we have enough room and then push 1600 * it onto the transmit ring. 1601 */ 1602 static int 1603 xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) 1604 { 1605 struct mbuf *m; 1606 struct netfront_info *np = txq->info; 1607 struct ifnet *ifp = np->xn_ifp; 1608 u_int nfrags; 1609 int otherend_id; 1610 1611 /** 1612 * Defragment the mbuf if necessary. 1613 */ 1614 nfrags = xn_count_frags(m_head); 1615 1616 /* 1617 * Check to see whether this request is longer than netback 1618 * can handle, and try to defrag it. 1619 */ 1620 /** 1621 * It is a bit lame, but the netback driver in Linux can't 1622 * deal with nfrags > MAX_TX_REQ_FRAGS, which is a quirk of 1623 * the Linux network stack. 1624 */ 1625 if (nfrags > np->maxfrags) { 1626 m = m_defrag(m_head, M_NOWAIT); 1627 if (!m) { 1628 /* 1629 * Defrag failed, so free the mbuf and 1630 * therefore drop the packet. 1631 */ 1632 m_freem(m_head); 1633 return (EMSGSIZE); 1634 } 1635 m_head = m; 1636 } 1637 1638 /* Determine how many fragments now exist */ 1639 nfrags = xn_count_frags(m_head); 1640 1641 /* 1642 * Check to see whether the defragmented packet has too many 1643 * segments for the Linux netback driver. 1644 */ 1645 /** 1646 * The FreeBSD TCP stack, with TSO enabled, can produce a chain 1647 * of mbufs longer than Linux can handle. Make sure we don't 1648 * pass a too-long chain over to the other side by dropping the 1649 * packet. It doesn't look like there is currently a way to 1650 * tell the TCP stack to generate a shorter chain of packets. 1651 */ 1652 if (nfrags > MAX_TX_REQ_FRAGS) { 1653 #ifdef DEBUG 1654 printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback " 1655 "won't be able to handle it, dropping\n", 1656 __func__, nfrags, MAX_TX_REQ_FRAGS); 1657 #endif 1658 m_freem(m_head); 1659 return (EMSGSIZE); 1660 } 1661 1662 /* 1663 * This check should be redundant. We've already verified that we 1664 * have enough slots in the ring to handle a packet of maximum 1665 * size, and that our packet is less than the maximum size. Keep 1666 * it in here as an assert for now just to make certain that 1667 * chain_cnt is accurate. 1668 */ 1669 KASSERT((txq->mbufs_cnt + nfrags) <= NET_TX_RING_SIZE, 1670 ("%s: chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE " 1671 "(%d)!", __func__, (int) txq->mbufs_cnt, 1672 (int) nfrags, (int) NET_TX_RING_SIZE)); 1673 1674 /* 1675 * Start packing the mbufs in this chain into 1676 * the fragment pointers. Stop when we run out 1677 * of fragments or hit the end of the mbuf chain. 1678 */ 1679 m = m_head; 1680 otherend_id = xenbus_get_otherend_id(np->xbdev); 1681 for (m = m_head; m; m = m->m_next) { 1682 netif_tx_request_t *tx; 1683 uintptr_t id; 1684 grant_ref_t ref; 1685 u_long mfn; /* XXX Wrong type? */ 1686 1687 tx = RING_GET_REQUEST(&txq->ring, txq->ring.req_prod_pvt); 1688 id = get_id_from_freelist(txq->mbufs); 1689 if (id == 0) 1690 panic("%s: was allocated the freelist head!\n", 1691 __func__); 1692 txq->mbufs_cnt++; 1693 if (txq->mbufs_cnt > NET_TX_RING_SIZE) 1694 panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n", 1695 __func__); 1696 txq->mbufs[id] = m; 1697 tx->id = id; 1698 ref = gnttab_claim_grant_reference(&txq->gref_head); 1699 KASSERT((short)ref >= 0, ("Negative ref")); 1700 mfn = virt_to_mfn(mtod(m, vm_offset_t)); 1701 gnttab_grant_foreign_access_ref(ref, otherend_id, 1702 mfn, GNTMAP_readonly); 1703 tx->gref = txq->grant_ref[id] = ref; 1704 tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1); 1705 tx->flags = 0; 1706 if (m == m_head) { 1707 /* 1708 * The first fragment has the entire packet 1709 * size, subsequent fragments have just the 1710 * fragment size. The backend works out the 1711 * true size of the first fragment by 1712 * subtracting the sizes of the other 1713 * fragments. 1714 */ 1715 tx->size = m->m_pkthdr.len; 1716 1717 /* 1718 * The first fragment contains the checksum flags 1719 * and is optionally followed by extra data for 1720 * TSO etc. 1721 */ 1722 /** 1723 * CSUM_TSO requires checksum offloading. 1724 * Some versions of FreeBSD fail to 1725 * set CSUM_TCP in the CSUM_TSO case, 1726 * so we have to test for CSUM_TSO 1727 * explicitly. 1728 */ 1729 if (m->m_pkthdr.csum_flags 1730 & (CSUM_DELAY_DATA | CSUM_TSO)) { 1731 tx->flags |= (NETTXF_csum_blank 1732 | NETTXF_data_validated); 1733 } 1734 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 1735 struct netif_extra_info *gso = 1736 (struct netif_extra_info *) 1737 RING_GET_REQUEST(&txq->ring, 1738 ++txq->ring.req_prod_pvt); 1739 1740 tx->flags |= NETTXF_extra_info; 1741 1742 gso->u.gso.size = m->m_pkthdr.tso_segsz; 1743 gso->u.gso.type = 1744 XEN_NETIF_GSO_TYPE_TCPV4; 1745 gso->u.gso.pad = 0; 1746 gso->u.gso.features = 0; 1747 1748 gso->type = XEN_NETIF_EXTRA_TYPE_GSO; 1749 gso->flags = 0; 1750 } 1751 } else { 1752 tx->size = m->m_len; 1753 } 1754 if (m->m_next) 1755 tx->flags |= NETTXF_more_data; 1756 1757 txq->ring.req_prod_pvt++; 1758 } 1759 BPF_MTAP(ifp, m_head); 1760 1761 xn_txeof(txq); 1762 1763 txq->stats.tx_bytes += m_head->m_pkthdr.len; 1764 txq->stats.tx_packets++; 1765 1766 return (0); 1767 } 1768 1769 /* equivalent of network_open() in Linux */ 1770 static void 1771 xn_ifinit_locked(struct netfront_info *np) 1772 { 1773 struct ifnet *ifp; 1774 int i; 1775 struct netfront_rxq *rxq; 1776 1777 XN_LOCK_ASSERT(np); 1778 1779 ifp = np->xn_ifp; 1780 1781 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1782 return; 1783 1784 xn_stop(np); 1785 1786 for (i = 0; i < np->num_queues; i++) { 1787 rxq = &np->rxq[i]; 1788 xn_alloc_rx_buffers(rxq); 1789 rxq->ring.sring->rsp_event = rxq->ring.rsp_cons + 1; 1790 } 1791 1792 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1793 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1794 if_link_state_change(ifp, LINK_STATE_UP); 1795 } 1796 1797 static void 1798 xn_ifinit(void *xsc) 1799 { 1800 struct netfront_info *sc = xsc; 1801 1802 XN_LOCK(sc); 1803 xn_ifinit_locked(sc); 1804 XN_UNLOCK(sc); 1805 } 1806 1807 static int 1808 xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1809 { 1810 struct netfront_info *sc = ifp->if_softc; 1811 struct ifreq *ifr = (struct ifreq *) data; 1812 #ifdef INET 1813 struct ifaddr *ifa = (struct ifaddr *)data; 1814 #endif 1815 1816 int mask, error = 0; 1817 switch(cmd) { 1818 case SIOCSIFADDR: 1819 #ifdef INET 1820 XN_LOCK(sc); 1821 if (ifa->ifa_addr->sa_family == AF_INET) { 1822 ifp->if_flags |= IFF_UP; 1823 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 1824 xn_ifinit_locked(sc); 1825 arp_ifinit(ifp, ifa); 1826 XN_UNLOCK(sc); 1827 } else { 1828 XN_UNLOCK(sc); 1829 #endif 1830 error = ether_ioctl(ifp, cmd, data); 1831 #ifdef INET 1832 } 1833 #endif 1834 break; 1835 case SIOCSIFMTU: 1836 ifp->if_mtu = ifr->ifr_mtu; 1837 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1838 xn_ifinit(sc); 1839 break; 1840 case SIOCSIFFLAGS: 1841 XN_LOCK(sc); 1842 if (ifp->if_flags & IFF_UP) { 1843 /* 1844 * If only the state of the PROMISC flag changed, 1845 * then just use the 'set promisc mode' command 1846 * instead of reinitializing the entire NIC. Doing 1847 * a full re-init means reloading the firmware and 1848 * waiting for it to start up, which may take a 1849 * second or two. 1850 */ 1851 xn_ifinit_locked(sc); 1852 } else { 1853 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1854 xn_stop(sc); 1855 } 1856 } 1857 sc->xn_if_flags = ifp->if_flags; 1858 XN_UNLOCK(sc); 1859 break; 1860 case SIOCSIFCAP: 1861 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 1862 if (mask & IFCAP_TXCSUM) { 1863 if (IFCAP_TXCSUM & ifp->if_capenable) { 1864 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 1865 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 1866 | CSUM_IP | CSUM_TSO); 1867 } else { 1868 ifp->if_capenable |= IFCAP_TXCSUM; 1869 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP 1870 | CSUM_IP); 1871 } 1872 } 1873 if (mask & IFCAP_RXCSUM) { 1874 ifp->if_capenable ^= IFCAP_RXCSUM; 1875 } 1876 if (mask & IFCAP_TSO4) { 1877 if (IFCAP_TSO4 & ifp->if_capenable) { 1878 ifp->if_capenable &= ~IFCAP_TSO4; 1879 ifp->if_hwassist &= ~CSUM_TSO; 1880 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 1881 ifp->if_capenable |= IFCAP_TSO4; 1882 ifp->if_hwassist |= CSUM_TSO; 1883 } else { 1884 IPRINTK("Xen requires tx checksum offload" 1885 " be enabled to use TSO\n"); 1886 error = EINVAL; 1887 } 1888 } 1889 if (mask & IFCAP_LRO) { 1890 ifp->if_capenable ^= IFCAP_LRO; 1891 1892 } 1893 break; 1894 case SIOCADDMULTI: 1895 case SIOCDELMULTI: 1896 break; 1897 case SIOCSIFMEDIA: 1898 case SIOCGIFMEDIA: 1899 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 1900 break; 1901 default: 1902 error = ether_ioctl(ifp, cmd, data); 1903 } 1904 1905 return (error); 1906 } 1907 1908 static void 1909 xn_stop(struct netfront_info *sc) 1910 { 1911 struct ifnet *ifp; 1912 1913 XN_LOCK_ASSERT(sc); 1914 1915 ifp = sc->xn_ifp; 1916 1917 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 1918 if_link_state_change(ifp, LINK_STATE_DOWN); 1919 } 1920 1921 static void 1922 xn_rebuild_rx_bufs(struct netfront_rxq *rxq) 1923 { 1924 int requeue_idx, i; 1925 grant_ref_t ref; 1926 netif_rx_request_t *req; 1927 1928 for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { 1929 struct mbuf *m; 1930 u_long pfn; 1931 1932 if (rxq->mbufs[i] == NULL) 1933 continue; 1934 1935 m = rxq->mbufs[requeue_idx] = xn_get_rx_mbuf(rxq, i); 1936 ref = rxq->grant_ref[requeue_idx] = xn_get_rx_ref(rxq, i); 1937 1938 req = RING_GET_REQUEST(&rxq->ring, requeue_idx); 1939 pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT; 1940 1941 gnttab_grant_foreign_access_ref(ref, 1942 xenbus_get_otherend_id(rxq->info->xbdev), 1943 pfn, 0); 1944 1945 req->gref = ref; 1946 req->id = requeue_idx; 1947 1948 requeue_idx++; 1949 } 1950 1951 rxq->ring.req_prod_pvt = requeue_idx; 1952 } 1953 1954 /* START of Xenolinux helper functions adapted to FreeBSD */ 1955 int 1956 xn_connect(struct netfront_info *np) 1957 { 1958 int i, error; 1959 u_int feature_rx_copy; 1960 struct netfront_rxq *rxq; 1961 struct netfront_txq *txq; 1962 1963 error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 1964 "feature-rx-copy", NULL, "%u", &feature_rx_copy); 1965 if (error != 0) 1966 feature_rx_copy = 0; 1967 1968 /* We only support rx copy. */ 1969 if (!feature_rx_copy) 1970 return (EPROTONOSUPPORT); 1971 1972 /* Recovery procedure: */ 1973 error = talk_to_backend(np->xbdev, np); 1974 if (error != 0) 1975 return (error); 1976 1977 /* Step 1: Reinitialise variables. */ 1978 xn_query_features(np); 1979 xn_configure_features(np); 1980 1981 /* Step 2: Release TX buffer */ 1982 for (i = 0; i < np->num_queues; i++) { 1983 txq = &np->txq[i]; 1984 xn_release_tx_bufs(txq); 1985 } 1986 1987 /* Step 3: Rebuild the RX buffer freelist and the RX ring itself. */ 1988 for (i = 0; i < np->num_queues; i++) { 1989 rxq = &np->rxq[i]; 1990 xn_rebuild_rx_bufs(rxq); 1991 } 1992 1993 /* Step 4: All public and private state should now be sane. Get 1994 * ready to start sending and receiving packets and give the driver 1995 * domain a kick because we've probably just requeued some 1996 * packets. 1997 */ 1998 netfront_carrier_on(np); 1999 for (i = 0; i < np->num_queues; i++) { 2000 txq = &np->txq[i]; 2001 xen_intr_signal(txq->xen_intr_handle); 2002 XN_TX_LOCK(txq); 2003 xn_txeof(txq); 2004 XN_TX_UNLOCK(txq); 2005 xn_alloc_rx_buffers(rxq); 2006 } 2007 2008 return (0); 2009 } 2010 2011 static void 2012 xn_query_features(struct netfront_info *np) 2013 { 2014 int val; 2015 2016 device_printf(np->xbdev, "backend features:"); 2017 2018 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2019 "feature-sg", NULL, "%d", &val) < 0) 2020 val = 0; 2021 2022 np->maxfrags = 1; 2023 if (val) { 2024 np->maxfrags = MAX_TX_REQ_FRAGS; 2025 printf(" feature-sg"); 2026 } 2027 2028 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2029 "feature-gso-tcpv4", NULL, "%d", &val) < 0) 2030 val = 0; 2031 2032 np->xn_ifp->if_capabilities &= ~(IFCAP_TSO4|IFCAP_LRO); 2033 if (val) { 2034 np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO; 2035 printf(" feature-gso-tcp4"); 2036 } 2037 2038 printf("\n"); 2039 } 2040 2041 static int 2042 xn_configure_features(struct netfront_info *np) 2043 { 2044 int err, cap_enabled; 2045 #if (defined(INET) || defined(INET6)) 2046 int i; 2047 #endif 2048 2049 err = 0; 2050 2051 if (np->xn_resume && 2052 ((np->xn_ifp->if_capenable & np->xn_ifp->if_capabilities) 2053 == np->xn_ifp->if_capenable)) { 2054 /* Current options are available, no need to do anything. */ 2055 return (0); 2056 } 2057 2058 /* Try to preserve as many options as possible. */ 2059 if (np->xn_resume) 2060 cap_enabled = np->xn_ifp->if_capenable; 2061 else 2062 cap_enabled = UINT_MAX; 2063 2064 #if (defined(INET) || defined(INET6)) 2065 for (i = 0; i < np->num_queues; i++) 2066 if ((np->xn_ifp->if_capenable & IFCAP_LRO) == 2067 (cap_enabled & IFCAP_LRO)) 2068 tcp_lro_free(&np->rxq[i].lro); 2069 #endif 2070 np->xn_ifp->if_capenable = 2071 np->xn_ifp->if_capabilities & ~(IFCAP_LRO|IFCAP_TSO4) & cap_enabled; 2072 np->xn_ifp->if_hwassist &= ~CSUM_TSO; 2073 #if (defined(INET) || defined(INET6)) 2074 for (i = 0; i < np->num_queues; i++) { 2075 if (xn_enable_lro && (np->xn_ifp->if_capabilities & IFCAP_LRO) == 2076 (cap_enabled & IFCAP_LRO)) { 2077 err = tcp_lro_init(&np->rxq[i].lro); 2078 if (err != 0) { 2079 device_printf(np->xbdev, "LRO initialization failed\n"); 2080 } else { 2081 np->rxq[i].lro.ifp = np->xn_ifp; 2082 np->xn_ifp->if_capenable |= IFCAP_LRO; 2083 } 2084 } 2085 } 2086 if ((np->xn_ifp->if_capabilities & IFCAP_TSO4) == 2087 (cap_enabled & IFCAP_TSO4)) { 2088 np->xn_ifp->if_capenable |= IFCAP_TSO4; 2089 np->xn_ifp->if_hwassist |= CSUM_TSO; 2090 } 2091 #endif 2092 return (err); 2093 } 2094 2095 static int 2096 xn_txq_mq_start_locked(struct netfront_txq *txq, struct mbuf *m) 2097 { 2098 struct netfront_info *np; 2099 struct ifnet *ifp; 2100 struct buf_ring *br; 2101 int error, notify; 2102 2103 np = txq->info; 2104 br = txq->br; 2105 ifp = np->xn_ifp; 2106 error = 0; 2107 2108 XN_TX_LOCK_ASSERT(txq); 2109 2110 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2111 !netfront_carrier_ok(np)) { 2112 if (m != NULL) 2113 error = drbr_enqueue(ifp, br, m); 2114 return (error); 2115 } 2116 2117 if (m != NULL) { 2118 error = drbr_enqueue(ifp, br, m); 2119 if (error != 0) 2120 return (error); 2121 } 2122 2123 while ((m = drbr_peek(ifp, br)) != NULL) { 2124 if (!xn_tx_slot_available(txq)) { 2125 drbr_putback(ifp, br, m); 2126 break; 2127 } 2128 2129 error = xn_assemble_tx_request(txq, m); 2130 /* xn_assemble_tx_request always consumes the mbuf*/ 2131 if (error != 0) { 2132 drbr_advance(ifp, br); 2133 break; 2134 } 2135 2136 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&txq->ring, notify); 2137 if (notify) 2138 xen_intr_signal(txq->xen_intr_handle); 2139 2140 drbr_advance(ifp, br); 2141 } 2142 2143 if (RING_FULL(&txq->ring)) 2144 txq->full = true; 2145 2146 return (0); 2147 } 2148 2149 static int 2150 xn_txq_mq_start(struct ifnet *ifp, struct mbuf *m) 2151 { 2152 struct netfront_info *np; 2153 struct netfront_txq *txq; 2154 int i, npairs, error; 2155 2156 np = ifp->if_softc; 2157 npairs = np->num_queues; 2158 2159 /* check if flowid is set */ 2160 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 2161 i = m->m_pkthdr.flowid % npairs; 2162 else 2163 i = curcpu % npairs; 2164 2165 txq = &np->txq[i]; 2166 2167 if (XN_TX_TRYLOCK(txq) != 0) { 2168 error = xn_txq_mq_start_locked(txq, m); 2169 XN_TX_UNLOCK(txq); 2170 } else { 2171 error = drbr_enqueue(ifp, txq->br, m); 2172 taskqueue_enqueue(txq->tq, &txq->defrtask); 2173 } 2174 2175 return (error); 2176 } 2177 2178 static void 2179 xn_qflush(struct ifnet *ifp) 2180 { 2181 struct netfront_info *np; 2182 struct netfront_txq *txq; 2183 struct mbuf *m; 2184 int i; 2185 2186 np = ifp->if_softc; 2187 2188 for (i = 0; i < np->num_queues; i++) { 2189 txq = &np->txq[i]; 2190 2191 XN_TX_LOCK(txq); 2192 while ((m = buf_ring_dequeue_sc(txq->br)) != NULL) 2193 m_freem(m); 2194 XN_TX_UNLOCK(txq); 2195 } 2196 2197 if_qflush(ifp); 2198 } 2199 2200 /** 2201 * Create a network device. 2202 * @param dev Newbus device representing this virtual NIC. 2203 */ 2204 int 2205 create_netdev(device_t dev) 2206 { 2207 struct netfront_info *np; 2208 int err; 2209 struct ifnet *ifp; 2210 2211 np = device_get_softc(dev); 2212 2213 np->xbdev = dev; 2214 2215 mtx_init(&np->sc_lock, "xnsc", "netfront softc lock", MTX_DEF); 2216 2217 ifmedia_init(&np->sc_media, 0, xn_ifmedia_upd, xn_ifmedia_sts); 2218 ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); 2219 ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL); 2220 2221 np->rx_min_target = RX_MIN_TARGET; 2222 np->rx_max_target = RX_MAX_TARGET; 2223 2224 err = xen_net_read_mac(dev, np->mac); 2225 if (err != 0) 2226 goto error; 2227 2228 /* Set up ifnet structure */ 2229 ifp = np->xn_ifp = if_alloc(IFT_ETHER); 2230 ifp->if_softc = np; 2231 if_initname(ifp, "xn", device_get_unit(dev)); 2232 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 2233 ifp->if_ioctl = xn_ioctl; 2234 2235 ifp->if_transmit = xn_txq_mq_start; 2236 ifp->if_qflush = xn_qflush; 2237 2238 ifp->if_init = xn_ifinit; 2239 2240 ifp->if_hwassist = XN_CSUM_FEATURES; 2241 ifp->if_capabilities = IFCAP_HWCSUM; 2242 ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 2243 ifp->if_hw_tsomaxsegcount = MAX_TX_REQ_FRAGS; 2244 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 2245 2246 ether_ifattach(ifp, np->mac); 2247 netfront_carrier_off(np); 2248 2249 return (0); 2250 2251 error: 2252 KASSERT(err != 0, ("Error path with no error code specified")); 2253 return (err); 2254 } 2255 2256 static int 2257 netfront_detach(device_t dev) 2258 { 2259 struct netfront_info *info = device_get_softc(dev); 2260 2261 DPRINTK("%s\n", xenbus_get_node(dev)); 2262 2263 netif_free(info); 2264 2265 return 0; 2266 } 2267 2268 static void 2269 netif_free(struct netfront_info *np) 2270 { 2271 2272 XN_LOCK(np); 2273 xn_stop(np); 2274 XN_UNLOCK(np); 2275 netif_disconnect_backend(np); 2276 free(np->rxq, M_DEVBUF); 2277 free(np->txq, M_DEVBUF); 2278 ether_ifdetach(np->xn_ifp); 2279 if_free(np->xn_ifp); 2280 np->xn_ifp = NULL; 2281 ifmedia_removeall(&np->sc_media); 2282 } 2283 2284 static void 2285 netif_disconnect_backend(struct netfront_info *np) 2286 { 2287 u_int i; 2288 2289 for (i = 0; i < np->num_queues; i++) { 2290 XN_RX_LOCK(&np->rxq[i]); 2291 XN_TX_LOCK(&np->txq[i]); 2292 } 2293 netfront_carrier_off(np); 2294 for (i = 0; i < np->num_queues; i++) { 2295 XN_RX_UNLOCK(&np->rxq[i]); 2296 XN_TX_UNLOCK(&np->txq[i]); 2297 } 2298 2299 for (i = 0; i < np->num_queues; i++) { 2300 disconnect_rxq(&np->rxq[i]); 2301 disconnect_txq(&np->txq[i]); 2302 } 2303 } 2304 2305 static int 2306 xn_ifmedia_upd(struct ifnet *ifp) 2307 { 2308 2309 return (0); 2310 } 2311 2312 static void 2313 xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 2314 { 2315 2316 ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE; 2317 ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; 2318 } 2319 2320 /* ** Driver registration ** */ 2321 static device_method_t netfront_methods[] = { 2322 /* Device interface */ 2323 DEVMETHOD(device_probe, netfront_probe), 2324 DEVMETHOD(device_attach, netfront_attach), 2325 DEVMETHOD(device_detach, netfront_detach), 2326 DEVMETHOD(device_shutdown, bus_generic_shutdown), 2327 DEVMETHOD(device_suspend, netfront_suspend), 2328 DEVMETHOD(device_resume, netfront_resume), 2329 2330 /* Xenbus interface */ 2331 DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed), 2332 2333 DEVMETHOD_END 2334 }; 2335 2336 static driver_t netfront_driver = { 2337 "xn", 2338 netfront_methods, 2339 sizeof(struct netfront_info), 2340 }; 2341 devclass_t netfront_devclass; 2342 2343 DRIVER_MODULE(xe, xenbusb_front, netfront_driver, netfront_devclass, NULL, 2344 NULL); 2345