1 /*- 2 * Copyright (c) 2004-2006 Kip Macy 3 * Copyright (c) 2015 Wei Liu <wei.liu2@citrix.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/sockio.h> 36 #include <sys/limits.h> 37 #include <sys/mbuf.h> 38 #include <sys/malloc.h> 39 #include <sys/module.h> 40 #include <sys/kernel.h> 41 #include <sys/socket.h> 42 #include <sys/sysctl.h> 43 #include <sys/taskqueue.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_arp.h> 48 #include <net/ethernet.h> 49 #include <net/if_media.h> 50 #include <net/bpf.h> 51 #include <net/if_types.h> 52 53 #include <netinet/in.h> 54 #include <netinet/ip.h> 55 #include <netinet/if_ether.h> 56 #include <netinet/tcp.h> 57 #include <netinet/tcp_lro.h> 58 59 #include <vm/vm.h> 60 #include <vm/pmap.h> 61 62 #include <sys/bus.h> 63 64 #include <xen/xen-os.h> 65 #include <xen/hypervisor.h> 66 #include <xen/xen_intr.h> 67 #include <xen/gnttab.h> 68 #include <xen/interface/memory.h> 69 #include <xen/interface/io/netif.h> 70 #include <xen/xenbus/xenbusvar.h> 71 72 #include "xenbus_if.h" 73 74 /* Features supported by all backends. TSO and LRO can be negotiated */ 75 #define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) 76 77 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) 78 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) 79 80 /* 81 * Should the driver do LRO on the RX end 82 * this can be toggled on the fly, but the 83 * interface must be reset (down/up) for it 84 * to take effect. 85 */ 86 static int xn_enable_lro = 1; 87 TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro); 88 89 /* 90 * Number of pairs of queues. 91 */ 92 static unsigned long xn_num_queues = 4; 93 TUNABLE_ULONG("hw.xn.num_queues", &xn_num_queues); 94 95 /** 96 * \brief The maximum allowed data fragments in a single transmit 97 * request. 98 * 99 * This limit is imposed by the backend driver. We assume here that 100 * we are dealing with a Linux driver domain and have set our limit 101 * to mirror the Linux MAX_SKB_FRAGS constant. 102 */ 103 #define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2) 104 105 #define RX_COPY_THRESHOLD 256 106 107 #define net_ratelimit() 0 108 109 struct netfront_rxq; 110 struct netfront_txq; 111 struct netfront_info; 112 struct netfront_rx_info; 113 114 static void xn_txeof(struct netfront_txq *); 115 static void xn_rxeof(struct netfront_rxq *); 116 static void xn_alloc_rx_buffers(struct netfront_rxq *); 117 118 static void xn_release_rx_bufs(struct netfront_rxq *); 119 static void xn_release_tx_bufs(struct netfront_txq *); 120 121 static void xn_rxq_intr(void *); 122 static void xn_txq_intr(void *); 123 static int xn_intr(void *); 124 static inline int xn_count_frags(struct mbuf *m); 125 static int xn_assemble_tx_request(struct netfront_txq *, struct mbuf *); 126 static int xn_ioctl(struct ifnet *, u_long, caddr_t); 127 static void xn_ifinit_locked(struct netfront_info *); 128 static void xn_ifinit(void *); 129 static void xn_stop(struct netfront_info *); 130 static void xn_query_features(struct netfront_info *np); 131 static int xn_configure_features(struct netfront_info *np); 132 static void netif_free(struct netfront_info *info); 133 static int netfront_detach(device_t dev); 134 135 static int xn_txq_mq_start_locked(struct netfront_txq *, struct mbuf *); 136 static int xn_txq_mq_start(struct ifnet *, struct mbuf *); 137 138 static int talk_to_backend(device_t dev, struct netfront_info *info); 139 static int create_netdev(device_t dev); 140 static void netif_disconnect_backend(struct netfront_info *info); 141 static int setup_device(device_t dev, struct netfront_info *info, 142 unsigned long); 143 static int xn_ifmedia_upd(struct ifnet *ifp); 144 static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); 145 146 int xn_connect(struct netfront_info *); 147 148 static int xn_get_responses(struct netfront_rxq *, 149 struct netfront_rx_info *, RING_IDX, RING_IDX *, 150 struct mbuf **); 151 152 #define virt_to_mfn(x) (vtophys(x) >> PAGE_SHIFT) 153 154 #define INVALID_P2M_ENTRY (~0UL) 155 156 struct xn_rx_stats 157 { 158 u_long rx_packets; /* total packets received */ 159 u_long rx_bytes; /* total bytes received */ 160 u_long rx_errors; /* bad packets received */ 161 }; 162 163 struct xn_tx_stats 164 { 165 u_long tx_packets; /* total packets transmitted */ 166 u_long tx_bytes; /* total bytes transmitted */ 167 u_long tx_errors; /* packet transmit problems */ 168 }; 169 170 #define XN_QUEUE_NAME_LEN 8 /* xn{t,r}x_%u, allow for two digits */ 171 struct netfront_rxq { 172 struct netfront_info *info; 173 u_int id; 174 char name[XN_QUEUE_NAME_LEN]; 175 struct mtx lock; 176 177 int ring_ref; 178 netif_rx_front_ring_t ring; 179 xen_intr_handle_t xen_intr_handle; 180 181 grant_ref_t gref_head; 182 grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; 183 184 struct mbuf *mbufs[NET_RX_RING_SIZE + 1]; 185 struct mbufq batch; /* batch queue */ 186 int target; 187 188 xen_pfn_t pfn_array[NET_RX_RING_SIZE]; 189 190 struct lro_ctrl lro; 191 192 struct taskqueue *tq; 193 struct task intrtask; 194 195 struct xn_rx_stats stats; 196 }; 197 198 struct netfront_txq { 199 struct netfront_info *info; 200 u_int id; 201 char name[XN_QUEUE_NAME_LEN]; 202 struct mtx lock; 203 204 int ring_ref; 205 netif_tx_front_ring_t ring; 206 xen_intr_handle_t xen_intr_handle; 207 208 grant_ref_t gref_head; 209 grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; 210 211 struct mbuf *mbufs[NET_TX_RING_SIZE + 1]; 212 int mbufs_cnt; 213 struct buf_ring *br; 214 215 struct taskqueue *tq; 216 struct task intrtask; 217 struct task defrtask; 218 219 bool full; 220 221 struct xn_tx_stats stats; 222 }; 223 224 struct netfront_info { 225 struct ifnet *xn_ifp; 226 227 struct mtx sc_lock; 228 229 u_int num_queues; 230 struct netfront_rxq *rxq; 231 struct netfront_txq *txq; 232 233 u_int carrier; 234 u_int maxfrags; 235 236 /* Receive-ring batched refills. */ 237 #define RX_MIN_TARGET 32 238 #define RX_MAX_TARGET NET_RX_RING_SIZE 239 int rx_min_target; 240 int rx_max_target; 241 242 device_t xbdev; 243 uint8_t mac[ETHER_ADDR_LEN]; 244 245 int xn_if_flags; 246 247 struct ifmedia sc_media; 248 249 bool xn_resume; 250 }; 251 252 struct netfront_rx_info { 253 struct netif_rx_response rx; 254 struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; 255 }; 256 257 #define XN_RX_LOCK(_q) mtx_lock(&(_q)->lock) 258 #define XN_RX_UNLOCK(_q) mtx_unlock(&(_q)->lock) 259 260 #define XN_TX_LOCK(_q) mtx_lock(&(_q)->lock) 261 #define XN_TX_TRYLOCK(_q) mtx_trylock(&(_q)->lock) 262 #define XN_TX_UNLOCK(_q) mtx_unlock(&(_q)->lock) 263 264 #define XN_LOCK(_sc) mtx_lock(&(_sc)->sc_lock); 265 #define XN_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_lock); 266 267 #define XN_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_lock, MA_OWNED); 268 #define XN_RX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); 269 #define XN_TX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); 270 271 #define netfront_carrier_on(netif) ((netif)->carrier = 1) 272 #define netfront_carrier_off(netif) ((netif)->carrier = 0) 273 #define netfront_carrier_ok(netif) ((netif)->carrier) 274 275 /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */ 276 277 static inline void 278 add_id_to_freelist(struct mbuf **list, uintptr_t id) 279 { 280 281 KASSERT(id != 0, 282 ("%s: the head item (0) must always be free.", __func__)); 283 list[id] = list[0]; 284 list[0] = (struct mbuf *)id; 285 } 286 287 static inline unsigned short 288 get_id_from_freelist(struct mbuf **list) 289 { 290 uintptr_t id; 291 292 id = (uintptr_t)list[0]; 293 KASSERT(id != 0, 294 ("%s: the head item (0) must always remain free.", __func__)); 295 list[0] = list[id]; 296 return (id); 297 } 298 299 static inline int 300 xn_rxidx(RING_IDX idx) 301 { 302 303 return idx & (NET_RX_RING_SIZE - 1); 304 } 305 306 static inline struct mbuf * 307 xn_get_rx_mbuf(struct netfront_rxq *rxq, RING_IDX ri) 308 { 309 int i; 310 struct mbuf *m; 311 312 i = xn_rxidx(ri); 313 m = rxq->mbufs[i]; 314 rxq->mbufs[i] = NULL; 315 return (m); 316 } 317 318 static inline grant_ref_t 319 xn_get_rx_ref(struct netfront_rxq *rxq, RING_IDX ri) 320 { 321 int i = xn_rxidx(ri); 322 grant_ref_t ref = rxq->grant_ref[i]; 323 324 KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n")); 325 rxq->grant_ref[i] = GRANT_REF_INVALID; 326 return (ref); 327 } 328 329 #define IPRINTK(fmt, args...) \ 330 printf("[XEN] " fmt, ##args) 331 #ifdef INVARIANTS 332 #define WPRINTK(fmt, args...) \ 333 printf("[XEN] " fmt, ##args) 334 #else 335 #define WPRINTK(fmt, args...) 336 #endif 337 #ifdef DEBUG 338 #define DPRINTK(fmt, args...) \ 339 printf("[XEN] %s: " fmt, __func__, ##args) 340 #else 341 #define DPRINTK(fmt, args...) 342 #endif 343 344 /** 345 * Read the 'mac' node at the given device's node in the store, and parse that 346 * as colon-separated octets, placing result the given mac array. mac must be 347 * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h). 348 * Return 0 on success, or errno on error. 349 */ 350 static int 351 xen_net_read_mac(device_t dev, uint8_t mac[]) 352 { 353 int error, i; 354 char *s, *e, *macstr; 355 const char *path; 356 357 path = xenbus_get_node(dev); 358 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 359 if (error == ENOENT) { 360 /* 361 * Deal with missing mac XenStore nodes on devices with 362 * HVM emulation (the 'ioemu' configuration attribute) 363 * enabled. 364 * 365 * The HVM emulator may execute in a stub device model 366 * domain which lacks the permission, only given to Dom0, 367 * to update the guest's XenStore tree. For this reason, 368 * the HVM emulator doesn't even attempt to write the 369 * front-side mac node, even when operating in Dom0. 370 * However, there should always be a mac listed in the 371 * backend tree. Fallback to this version if our query 372 * of the front side XenStore location doesn't find 373 * anything. 374 */ 375 path = xenbus_get_otherend_path(dev); 376 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 377 } 378 if (error != 0) { 379 xenbus_dev_fatal(dev, error, "parsing %s/mac", path); 380 return (error); 381 } 382 383 s = macstr; 384 for (i = 0; i < ETHER_ADDR_LEN; i++) { 385 mac[i] = strtoul(s, &e, 16); 386 if (s == e || (e[0] != ':' && e[0] != 0)) { 387 free(macstr, M_XENBUS); 388 return (ENOENT); 389 } 390 s = &e[1]; 391 } 392 free(macstr, M_XENBUS); 393 return (0); 394 } 395 396 /** 397 * Entry point to this code when a new device is created. Allocate the basic 398 * structures and the ring buffers for communication with the backend, and 399 * inform the backend of the appropriate details for those. Switch to 400 * Connected state. 401 */ 402 static int 403 netfront_probe(device_t dev) 404 { 405 406 if (xen_hvm_domain() && xen_disable_pv_nics != 0) 407 return (ENXIO); 408 409 if (!strcmp(xenbus_get_type(dev), "vif")) { 410 device_set_desc(dev, "Virtual Network Interface"); 411 return (0); 412 } 413 414 return (ENXIO); 415 } 416 417 static int 418 netfront_attach(device_t dev) 419 { 420 int err; 421 422 err = create_netdev(dev); 423 if (err != 0) { 424 xenbus_dev_fatal(dev, err, "creating netdev"); 425 return (err); 426 } 427 428 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), 429 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 430 OID_AUTO, "enable_lro", CTLFLAG_RW, 431 &xn_enable_lro, 0, "Large Receive Offload"); 432 433 SYSCTL_ADD_ULONG(device_get_sysctl_ctx(dev), 434 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 435 OID_AUTO, "num_queues", CTLFLAG_RD, 436 &xn_num_queues, "Number of pairs of queues"); 437 438 return (0); 439 } 440 441 static int 442 netfront_suspend(device_t dev) 443 { 444 struct netfront_info *np = device_get_softc(dev); 445 u_int i; 446 447 for (i = 0; i < np->num_queues; i++) { 448 XN_RX_LOCK(&np->rxq[i]); 449 XN_TX_LOCK(&np->txq[i]); 450 } 451 netfront_carrier_off(np); 452 for (i = 0; i < np->num_queues; i++) { 453 XN_RX_UNLOCK(&np->rxq[i]); 454 XN_TX_UNLOCK(&np->txq[i]); 455 } 456 return (0); 457 } 458 459 /** 460 * We are reconnecting to the backend, due to a suspend/resume, or a backend 461 * driver restart. We tear down our netif structure and recreate it, but 462 * leave the device-layer structures intact so that this is transparent to the 463 * rest of the kernel. 464 */ 465 static int 466 netfront_resume(device_t dev) 467 { 468 struct netfront_info *info = device_get_softc(dev); 469 470 info->xn_resume = true; 471 netif_disconnect_backend(info); 472 return (0); 473 } 474 475 static int 476 write_queue_xenstore_keys(device_t dev, 477 struct netfront_rxq *rxq, 478 struct netfront_txq *txq, 479 struct xs_transaction *xst, bool hierarchy) 480 { 481 int err; 482 const char *message; 483 const char *node = xenbus_get_node(dev); 484 char *path; 485 size_t path_size; 486 487 KASSERT(rxq->id == txq->id, ("Mismatch between RX and TX queue ids")); 488 /* Split event channel support is not yet there. */ 489 KASSERT(rxq->xen_intr_handle == txq->xen_intr_handle, 490 ("Split event channels are not supported")); 491 492 if (hierarchy) { 493 path_size = strlen(node) + 10; 494 path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); 495 snprintf(path, path_size, "%s/queue-%u", node, rxq->id); 496 } else { 497 path_size = strlen(node) + 1; 498 path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); 499 snprintf(path, path_size, "%s", node); 500 } 501 502 err = xs_printf(*xst, path, "tx-ring-ref","%u", txq->ring_ref); 503 if (err != 0) { 504 message = "writing tx ring-ref"; 505 goto error; 506 } 507 err = xs_printf(*xst, path, "rx-ring-ref","%u", rxq->ring_ref); 508 if (err != 0) { 509 message = "writing rx ring-ref"; 510 goto error; 511 } 512 err = xs_printf(*xst, path, "event-channel", "%u", 513 xen_intr_port(rxq->xen_intr_handle)); 514 if (err != 0) { 515 message = "writing event-channel"; 516 goto error; 517 } 518 519 free(path, M_DEVBUF); 520 521 return (0); 522 523 error: 524 free(path, M_DEVBUF); 525 xenbus_dev_fatal(dev, err, "%s", message); 526 527 return (err); 528 } 529 530 /* Common code used when first setting up, and when resuming. */ 531 static int 532 talk_to_backend(device_t dev, struct netfront_info *info) 533 { 534 const char *message; 535 struct xs_transaction xst; 536 const char *node = xenbus_get_node(dev); 537 int err; 538 unsigned long num_queues, max_queues = 0; 539 unsigned int i; 540 541 err = xen_net_read_mac(dev, info->mac); 542 if (err != 0) { 543 xenbus_dev_fatal(dev, err, "parsing %s/mac", node); 544 goto out; 545 } 546 547 err = xs_scanf(XST_NIL, xenbus_get_otherend_path(info->xbdev), 548 "multi-queue-max-queues", NULL, "%lu", &max_queues); 549 if (err != 0) 550 max_queues = 1; 551 num_queues = xn_num_queues; 552 if (num_queues > max_queues) 553 num_queues = max_queues; 554 555 err = setup_device(dev, info, num_queues); 556 if (err != 0) 557 goto out; 558 559 again: 560 err = xs_transaction_start(&xst); 561 if (err != 0) { 562 xenbus_dev_fatal(dev, err, "starting transaction"); 563 goto free; 564 } 565 566 if (info->num_queues == 1) { 567 err = write_queue_xenstore_keys(dev, &info->rxq[0], 568 &info->txq[0], &xst, false); 569 if (err != 0) 570 goto abort_transaction_no_def_error; 571 } else { 572 err = xs_printf(xst, node, "multi-queue-num-queues", 573 "%u", info->num_queues); 574 if (err != 0) { 575 message = "writing multi-queue-num-queues"; 576 goto abort_transaction; 577 } 578 579 for (i = 0; i < info->num_queues; i++) { 580 err = write_queue_xenstore_keys(dev, &info->rxq[i], 581 &info->txq[i], &xst, true); 582 if (err != 0) 583 goto abort_transaction_no_def_error; 584 } 585 } 586 587 err = xs_printf(xst, node, "request-rx-copy", "%u", 1); 588 if (err != 0) { 589 message = "writing request-rx-copy"; 590 goto abort_transaction; 591 } 592 err = xs_printf(xst, node, "feature-rx-notify", "%d", 1); 593 if (err != 0) { 594 message = "writing feature-rx-notify"; 595 goto abort_transaction; 596 } 597 err = xs_printf(xst, node, "feature-sg", "%d", 1); 598 if (err != 0) { 599 message = "writing feature-sg"; 600 goto abort_transaction; 601 } 602 err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1); 603 if (err != 0) { 604 message = "writing feature-gso-tcpv4"; 605 goto abort_transaction; 606 } 607 608 err = xs_transaction_end(xst, 0); 609 if (err != 0) { 610 if (err == EAGAIN) 611 goto again; 612 xenbus_dev_fatal(dev, err, "completing transaction"); 613 goto free; 614 } 615 616 return 0; 617 618 abort_transaction: 619 xenbus_dev_fatal(dev, err, "%s", message); 620 abort_transaction_no_def_error: 621 xs_transaction_end(xst, 1); 622 free: 623 netif_free(info); 624 out: 625 return (err); 626 } 627 628 static void 629 xn_rxq_tq_intr(void *xrxq, int pending) 630 { 631 struct netfront_rxq *rxq = xrxq; 632 633 XN_RX_LOCK(rxq); 634 xn_rxeof(rxq); 635 XN_RX_UNLOCK(rxq); 636 } 637 638 static void 639 xn_txq_start(struct netfront_txq *txq) 640 { 641 struct netfront_info *np = txq->info; 642 struct ifnet *ifp = np->xn_ifp; 643 644 XN_TX_LOCK_ASSERT(txq); 645 if (!drbr_empty(ifp, txq->br)) 646 xn_txq_mq_start_locked(txq, NULL); 647 } 648 649 static void 650 xn_txq_tq_intr(void *xtxq, int pending) 651 { 652 struct netfront_txq *txq = xtxq; 653 654 XN_TX_LOCK(txq); 655 if (RING_HAS_UNCONSUMED_RESPONSES(&txq->ring)) 656 xn_txeof(txq); 657 xn_txq_start(txq); 658 XN_TX_UNLOCK(txq); 659 } 660 661 static void 662 xn_txq_tq_deferred(void *xtxq, int pending) 663 { 664 struct netfront_txq *txq = xtxq; 665 666 XN_TX_LOCK(txq); 667 xn_txq_start(txq); 668 XN_TX_UNLOCK(txq); 669 } 670 671 static void 672 disconnect_rxq(struct netfront_rxq *rxq) 673 { 674 675 xn_release_rx_bufs(rxq); 676 gnttab_free_grant_references(rxq->gref_head); 677 gnttab_end_foreign_access_ref(rxq->ring_ref); 678 /* 679 * No split event channel support at the moment, handle will 680 * be unbound in tx. So no need to call xen_intr_unbind here, 681 * but we do want to reset the handler to 0. 682 */ 683 rxq->xen_intr_handle = 0; 684 } 685 686 static void 687 destroy_rxq(struct netfront_rxq *rxq) 688 { 689 690 free(rxq->ring.sring, M_DEVBUF); 691 taskqueue_drain_all(rxq->tq); 692 taskqueue_free(rxq->tq); 693 } 694 695 static void 696 destroy_rxqs(struct netfront_info *np) 697 { 698 int i; 699 700 for (i = 0; i < np->num_queues; i++) 701 destroy_rxq(&np->rxq[i]); 702 703 free(np->rxq, M_DEVBUF); 704 np->rxq = NULL; 705 } 706 707 static int 708 setup_rxqs(device_t dev, struct netfront_info *info, 709 unsigned long num_queues) 710 { 711 int q, i; 712 int error; 713 netif_rx_sring_t *rxs; 714 struct netfront_rxq *rxq; 715 716 info->rxq = malloc(sizeof(struct netfront_rxq) * num_queues, 717 M_DEVBUF, M_WAITOK|M_ZERO); 718 719 for (q = 0; q < num_queues; q++) { 720 rxq = &info->rxq[q]; 721 722 rxq->id = q; 723 rxq->info = info; 724 rxq->target = RX_MIN_TARGET; 725 rxq->ring_ref = GRANT_REF_INVALID; 726 rxq->ring.sring = NULL; 727 snprintf(rxq->name, XN_QUEUE_NAME_LEN, "xnrx_%u", q); 728 mtx_init(&rxq->lock, rxq->name, "netfront receive lock", 729 MTX_DEF); 730 731 for (i = 0; i <= NET_RX_RING_SIZE; i++) { 732 rxq->mbufs[i] = NULL; 733 rxq->grant_ref[i] = GRANT_REF_INVALID; 734 } 735 736 mbufq_init(&rxq->batch, INT_MAX); 737 738 /* Start resources allocation */ 739 740 if (gnttab_alloc_grant_references(RX_MAX_TARGET, 741 &rxq->gref_head) != 0) { 742 device_printf(dev, "allocating rx gref"); 743 error = ENOMEM; 744 goto fail; 745 } 746 747 rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, 748 M_WAITOK|M_ZERO); 749 SHARED_RING_INIT(rxs); 750 FRONT_RING_INIT(&rxq->ring, rxs, PAGE_SIZE); 751 752 error = xenbus_grant_ring(dev, virt_to_mfn(rxs), 753 &rxq->ring_ref); 754 if (error != 0) { 755 device_printf(dev, "granting rx ring page"); 756 goto fail_grant_ring; 757 } 758 759 TASK_INIT(&rxq->intrtask, 0, xn_rxq_tq_intr, rxq); 760 rxq->tq = taskqueue_create_fast(rxq->name, M_WAITOK, 761 taskqueue_thread_enqueue, &rxq->tq); 762 763 error = taskqueue_start_threads(&rxq->tq, 1, PI_NET, 764 "%s rxq %d", device_get_nameunit(dev), rxq->id); 765 if (error != 0) { 766 device_printf(dev, "failed to start rx taskq %d\n", 767 rxq->id); 768 goto fail_start_thread; 769 } 770 } 771 772 return (0); 773 774 fail_start_thread: 775 gnttab_end_foreign_access_ref(rxq->ring_ref); 776 taskqueue_drain_all(rxq->tq); 777 taskqueue_free(rxq->tq); 778 fail_grant_ring: 779 gnttab_free_grant_references(rxq->gref_head); 780 free(rxq->ring.sring, M_DEVBUF); 781 fail: 782 for (; q >= 0; q--) { 783 disconnect_rxq(&info->rxq[q]); 784 destroy_rxq(&info->rxq[q]); 785 } 786 787 free(info->rxq, M_DEVBUF); 788 return (error); 789 } 790 791 static void 792 disconnect_txq(struct netfront_txq *txq) 793 { 794 795 xn_release_tx_bufs(txq); 796 gnttab_free_grant_references(txq->gref_head); 797 gnttab_end_foreign_access_ref(txq->ring_ref); 798 xen_intr_unbind(&txq->xen_intr_handle); 799 } 800 801 static void 802 destroy_txq(struct netfront_txq *txq) 803 { 804 805 free(txq->ring.sring, M_DEVBUF); 806 buf_ring_free(txq->br, M_DEVBUF); 807 taskqueue_drain_all(txq->tq); 808 taskqueue_free(txq->tq); 809 } 810 811 static void 812 destroy_txqs(struct netfront_info *np) 813 { 814 int i; 815 816 for (i = 0; i < np->num_queues; i++) 817 destroy_txq(&np->txq[i]); 818 819 free(np->txq, M_DEVBUF); 820 np->txq = NULL; 821 } 822 823 static int 824 setup_txqs(device_t dev, struct netfront_info *info, 825 unsigned long num_queues) 826 { 827 int q, i; 828 int error; 829 netif_tx_sring_t *txs; 830 struct netfront_txq *txq; 831 832 info->txq = malloc(sizeof(struct netfront_txq) * num_queues, 833 M_DEVBUF, M_WAITOK|M_ZERO); 834 835 for (q = 0; q < num_queues; q++) { 836 txq = &info->txq[q]; 837 838 txq->id = q; 839 txq->info = info; 840 841 txq->ring_ref = GRANT_REF_INVALID; 842 txq->ring.sring = NULL; 843 844 snprintf(txq->name, XN_QUEUE_NAME_LEN, "xntx_%u", q); 845 846 mtx_init(&txq->lock, txq->name, "netfront transmit lock", 847 MTX_DEF); 848 849 for (i = 0; i <= NET_TX_RING_SIZE; i++) { 850 txq->mbufs[i] = (void *) ((u_long) i+1); 851 txq->grant_ref[i] = GRANT_REF_INVALID; 852 } 853 txq->mbufs[NET_TX_RING_SIZE] = (void *)0; 854 855 /* Start resources allocation. */ 856 857 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, 858 &txq->gref_head) != 0) { 859 device_printf(dev, "failed to allocate tx grant refs\n"); 860 error = ENOMEM; 861 goto fail; 862 } 863 864 txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, 865 M_WAITOK|M_ZERO); 866 SHARED_RING_INIT(txs); 867 FRONT_RING_INIT(&txq->ring, txs, PAGE_SIZE); 868 869 error = xenbus_grant_ring(dev, virt_to_mfn(txs), 870 &txq->ring_ref); 871 if (error != 0) { 872 device_printf(dev, "failed to grant tx ring\n"); 873 goto fail_grant_ring; 874 } 875 876 txq->br = buf_ring_alloc(NET_TX_RING_SIZE, M_DEVBUF, 877 M_WAITOK, &txq->lock); 878 TASK_INIT(&txq->defrtask, 0, xn_txq_tq_deferred, txq); 879 TASK_INIT(&txq->intrtask, 0, xn_txq_tq_intr, txq); 880 881 txq->tq = taskqueue_create_fast(txq->name, M_WAITOK, 882 taskqueue_thread_enqueue, &txq->tq); 883 884 error = taskqueue_start_threads(&txq->tq, 1, PI_NET, 885 "%s txq %d", device_get_nameunit(dev), txq->id); 886 if (error != 0) { 887 device_printf(dev, "failed to start tx taskq %d\n", 888 txq->id); 889 goto fail_start_thread; 890 } 891 892 error = xen_intr_alloc_and_bind_local_port(dev, 893 xenbus_get_otherend_id(dev), xn_intr, /* handler */ NULL, 894 &info->txq[q], 895 INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY, 896 &txq->xen_intr_handle); 897 898 if (error != 0) { 899 device_printf(dev, "xen_intr_alloc_and_bind_local_port failed\n"); 900 goto fail_bind_port; 901 } 902 } 903 904 return (0); 905 906 fail_bind_port: 907 taskqueue_drain_all(txq->tq); 908 fail_start_thread: 909 gnttab_free_grant_references(txq->gref_head); 910 free(txq->ring.sring, M_DEVBUF); 911 gnttab_end_foreign_access_ref(txq->ring_ref); 912 buf_ring_free(txq->br, M_DEVBUF); 913 taskqueue_free(txq->tq); 914 fail_grant_ring: 915 gnttab_free_grant_references(txq->gref_head); 916 free(txq->ring.sring, M_DEVBUF); 917 fail: 918 for (; q >= 0; q--) { 919 disconnect_txq(&info->txq[q]); 920 destroy_txq(&info->txq[q]); 921 } 922 923 free(info->txq, M_DEVBUF); 924 return (error); 925 } 926 927 static int 928 setup_device(device_t dev, struct netfront_info *info, 929 unsigned long num_queues) 930 { 931 int error; 932 int q; 933 934 if (info->txq) 935 destroy_txqs(info); 936 937 if (info->rxq) 938 destroy_rxqs(info); 939 940 info->num_queues = 0; 941 942 error = setup_rxqs(dev, info, num_queues); 943 if (error != 0) 944 goto out; 945 error = setup_txqs(dev, info, num_queues); 946 if (error != 0) 947 goto out; 948 949 info->num_queues = num_queues; 950 951 /* No split event channel at the moment. */ 952 for (q = 0; q < num_queues; q++) 953 info->rxq[q].xen_intr_handle = info->txq[q].xen_intr_handle; 954 955 return (0); 956 957 out: 958 KASSERT(error != 0, ("Error path taken without providing an error code")); 959 return (error); 960 } 961 962 #ifdef INET 963 /** 964 * If this interface has an ipv4 address, send an arp for it. This 965 * helps to get the network going again after migrating hosts. 966 */ 967 static void 968 netfront_send_fake_arp(device_t dev, struct netfront_info *info) 969 { 970 struct ifnet *ifp; 971 struct ifaddr *ifa; 972 973 ifp = info->xn_ifp; 974 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 975 if (ifa->ifa_addr->sa_family == AF_INET) { 976 arp_ifinit(ifp, ifa); 977 } 978 } 979 } 980 #endif 981 982 /** 983 * Callback received when the backend's state changes. 984 */ 985 static void 986 netfront_backend_changed(device_t dev, XenbusState newstate) 987 { 988 struct netfront_info *sc = device_get_softc(dev); 989 990 DPRINTK("newstate=%d\n", newstate); 991 992 switch (newstate) { 993 case XenbusStateInitialising: 994 case XenbusStateInitialised: 995 case XenbusStateUnknown: 996 case XenbusStateClosed: 997 case XenbusStateReconfigured: 998 case XenbusStateReconfiguring: 999 break; 1000 case XenbusStateInitWait: 1001 if (xenbus_get_state(dev) != XenbusStateInitialising) 1002 break; 1003 if (xn_connect(sc) != 0) 1004 break; 1005 xenbus_set_state(dev, XenbusStateConnected); 1006 break; 1007 case XenbusStateClosing: 1008 xenbus_set_state(dev, XenbusStateClosed); 1009 break; 1010 case XenbusStateConnected: 1011 #ifdef INET 1012 netfront_send_fake_arp(dev, sc); 1013 #endif 1014 break; 1015 } 1016 } 1017 1018 /** 1019 * \brief Verify that there is sufficient space in the Tx ring 1020 * buffer for a maximally sized request to be enqueued. 1021 * 1022 * A transmit request requires a transmit descriptor for each packet 1023 * fragment, plus up to 2 entries for "options" (e.g. TSO). 1024 */ 1025 static inline int 1026 xn_tx_slot_available(struct netfront_txq *txq) 1027 { 1028 1029 return (RING_FREE_REQUESTS(&txq->ring) > (MAX_TX_REQ_FRAGS + 2)); 1030 } 1031 1032 static void 1033 xn_release_tx_bufs(struct netfront_txq *txq) 1034 { 1035 int i; 1036 1037 for (i = 1; i <= NET_TX_RING_SIZE; i++) { 1038 struct mbuf *m; 1039 1040 m = txq->mbufs[i]; 1041 1042 /* 1043 * We assume that no kernel addresses are 1044 * less than NET_TX_RING_SIZE. Any entry 1045 * in the table that is below this number 1046 * must be an index from free-list tracking. 1047 */ 1048 if (((uintptr_t)m) <= NET_TX_RING_SIZE) 1049 continue; 1050 gnttab_end_foreign_access_ref(txq->grant_ref[i]); 1051 gnttab_release_grant_reference(&txq->gref_head, 1052 txq->grant_ref[i]); 1053 txq->grant_ref[i] = GRANT_REF_INVALID; 1054 add_id_to_freelist(txq->mbufs, i); 1055 txq->mbufs_cnt--; 1056 if (txq->mbufs_cnt < 0) { 1057 panic("%s: tx_chain_cnt must be >= 0", __func__); 1058 } 1059 m_free(m); 1060 } 1061 } 1062 1063 static void 1064 xn_alloc_rx_buffers(struct netfront_rxq *rxq) 1065 { 1066 struct netfront_info *np = rxq->info; 1067 int otherend_id = xenbus_get_otherend_id(np->xbdev); 1068 unsigned short id; 1069 struct mbuf *m_new; 1070 int i, batch_target, notify; 1071 RING_IDX req_prod; 1072 grant_ref_t ref; 1073 netif_rx_request_t *req; 1074 vm_offset_t vaddr; 1075 u_long pfn; 1076 1077 req_prod = rxq->ring.req_prod_pvt; 1078 1079 if (__predict_false(np->carrier == 0)) 1080 return; 1081 1082 /* 1083 * Allocate mbufs greedily, even though we batch updates to the 1084 * receive ring. This creates a less bursty demand on the memory 1085 * allocator, and so should reduce the chance of failed allocation 1086 * requests both for ourself and for other kernel subsystems. 1087 * 1088 * Here we attempt to maintain rx_target buffers in flight, counting 1089 * buffers that we have yet to process in the receive ring. 1090 */ 1091 batch_target = rxq->target - (req_prod - rxq->ring.rsp_cons); 1092 for (i = mbufq_len(&rxq->batch); i < batch_target; i++) { 1093 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); 1094 if (m_new == NULL) { 1095 if (i != 0) 1096 goto refill; 1097 /* XXX set timer */ 1098 break; 1099 } 1100 m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE; 1101 1102 /* queue the mbufs allocated */ 1103 mbufq_enqueue(&rxq->batch, m_new); 1104 } 1105 1106 /* 1107 * If we've allocated at least half of our target number of entries, 1108 * submit them to the backend - we have enough to make the overhead 1109 * of submission worthwhile. Otherwise wait for more mbufs and 1110 * request entries to become available. 1111 */ 1112 if (i < (rxq->target/2)) { 1113 if (req_prod > rxq->ring.sring->req_prod) 1114 goto push; 1115 return; 1116 } 1117 1118 /* 1119 * Double floating fill target if we risked having the backend 1120 * run out of empty buffers for receive traffic. We define "running 1121 * low" as having less than a fourth of our target buffers free 1122 * at the time we refilled the queue. 1123 */ 1124 if ((req_prod - rxq->ring.sring->rsp_prod) < (rxq->target / 4)) { 1125 rxq->target *= 2; 1126 if (rxq->target > np->rx_max_target) 1127 rxq->target = np->rx_max_target; 1128 } 1129 1130 refill: 1131 for (i = 0; ; i++) { 1132 if ((m_new = mbufq_dequeue(&rxq->batch)) == NULL) 1133 break; 1134 1135 m_new->m_ext.ext_arg1 = (vm_paddr_t *)(uintptr_t)( 1136 vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT); 1137 1138 id = xn_rxidx(req_prod + i); 1139 1140 KASSERT(rxq->mbufs[id] == NULL, ("non-NULL xn_rx_chain")); 1141 rxq->mbufs[id] = m_new; 1142 1143 ref = gnttab_claim_grant_reference(&rxq->gref_head); 1144 KASSERT(ref != GNTTAB_LIST_END, 1145 ("reserved grant references exhuasted")); 1146 rxq->grant_ref[id] = ref; 1147 1148 vaddr = mtod(m_new, vm_offset_t); 1149 pfn = vtophys(vaddr) >> PAGE_SHIFT; 1150 req = RING_GET_REQUEST(&rxq->ring, req_prod + i); 1151 1152 gnttab_grant_foreign_access_ref(ref, otherend_id, pfn, 0); 1153 req->id = id; 1154 req->gref = ref; 1155 1156 rxq->pfn_array[i] = 1157 vtophys(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT; 1158 } 1159 1160 KASSERT(i, ("no mbufs processed")); /* should have returned earlier */ 1161 KASSERT(mbufq_len(&rxq->batch) == 0, ("not all mbufs processed")); 1162 /* 1163 * We may have allocated buffers which have entries outstanding 1164 * in the page * update queue -- make sure we flush those first! 1165 */ 1166 wmb(); 1167 1168 /* Above is a suitable barrier to ensure backend will see requests. */ 1169 rxq->ring.req_prod_pvt = req_prod + i; 1170 push: 1171 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rxq->ring, notify); 1172 if (notify) 1173 xen_intr_signal(rxq->xen_intr_handle); 1174 } 1175 1176 static void 1177 xn_release_rx_bufs(struct netfront_rxq *rxq) 1178 { 1179 int i, ref; 1180 struct mbuf *m; 1181 1182 for (i = 0; i < NET_RX_RING_SIZE; i++) { 1183 m = rxq->mbufs[i]; 1184 1185 if (m == NULL) 1186 continue; 1187 1188 ref = rxq->grant_ref[i]; 1189 if (ref == GRANT_REF_INVALID) 1190 continue; 1191 1192 gnttab_end_foreign_access_ref(ref); 1193 gnttab_release_grant_reference(&rxq->gref_head, ref); 1194 rxq->mbufs[i] = NULL; 1195 rxq->grant_ref[i] = GRANT_REF_INVALID; 1196 m_freem(m); 1197 } 1198 } 1199 1200 static void 1201 xn_rxeof(struct netfront_rxq *rxq) 1202 { 1203 struct ifnet *ifp; 1204 struct netfront_info *np = rxq->info; 1205 #if (defined(INET) || defined(INET6)) 1206 struct lro_ctrl *lro = &rxq->lro; 1207 struct lro_entry *queued; 1208 #endif 1209 struct netfront_rx_info rinfo; 1210 struct netif_rx_response *rx = &rinfo.rx; 1211 struct netif_extra_info *extras = rinfo.extras; 1212 RING_IDX i, rp; 1213 struct mbuf *m; 1214 struct mbufq mbufq_rxq, mbufq_errq; 1215 int err, work_to_do; 1216 1217 do { 1218 XN_RX_LOCK_ASSERT(rxq); 1219 if (!netfront_carrier_ok(np)) 1220 return; 1221 1222 /* XXX: there should be some sane limit. */ 1223 mbufq_init(&mbufq_errq, INT_MAX); 1224 mbufq_init(&mbufq_rxq, INT_MAX); 1225 1226 ifp = np->xn_ifp; 1227 1228 rp = rxq->ring.sring->rsp_prod; 1229 rmb(); /* Ensure we see queued responses up to 'rp'. */ 1230 1231 i = rxq->ring.rsp_cons; 1232 while ((i != rp)) { 1233 memcpy(rx, RING_GET_RESPONSE(&rxq->ring, i), sizeof(*rx)); 1234 memset(extras, 0, sizeof(rinfo.extras)); 1235 1236 m = NULL; 1237 err = xn_get_responses(rxq, &rinfo, rp, &i, &m); 1238 1239 if (__predict_false(err)) { 1240 if (m) 1241 (void )mbufq_enqueue(&mbufq_errq, m); 1242 rxq->stats.rx_errors++; 1243 continue; 1244 } 1245 1246 m->m_pkthdr.rcvif = ifp; 1247 if ( rx->flags & NETRXF_data_validated ) { 1248 /* Tell the stack the checksums are okay */ 1249 /* 1250 * XXX this isn't necessarily the case - need to add 1251 * check 1252 */ 1253 1254 m->m_pkthdr.csum_flags |= 1255 (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID 1256 | CSUM_PSEUDO_HDR); 1257 m->m_pkthdr.csum_data = 0xffff; 1258 } 1259 1260 rxq->stats.rx_packets++; 1261 rxq->stats.rx_bytes += m->m_pkthdr.len; 1262 1263 (void )mbufq_enqueue(&mbufq_rxq, m); 1264 rxq->ring.rsp_cons = i; 1265 } 1266 1267 mbufq_drain(&mbufq_errq); 1268 1269 /* 1270 * Process all the mbufs after the remapping is complete. 1271 * Break the mbuf chain first though. 1272 */ 1273 while ((m = mbufq_dequeue(&mbufq_rxq)) != NULL) { 1274 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 1275 1276 /* XXX: Do we really need to drop the rx lock? */ 1277 XN_RX_UNLOCK(rxq); 1278 #if (defined(INET) || defined(INET6)) 1279 /* Use LRO if possible */ 1280 if ((ifp->if_capenable & IFCAP_LRO) == 0 || 1281 lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) { 1282 /* 1283 * If LRO fails, pass up to the stack 1284 * directly. 1285 */ 1286 (*ifp->if_input)(ifp, m); 1287 } 1288 #else 1289 (*ifp->if_input)(ifp, m); 1290 #endif 1291 1292 XN_RX_LOCK(rxq); 1293 } 1294 1295 rxq->ring.rsp_cons = i; 1296 1297 #if (defined(INET) || defined(INET6)) 1298 /* 1299 * Flush any outstanding LRO work 1300 */ 1301 while (!SLIST_EMPTY(&lro->lro_active)) { 1302 queued = SLIST_FIRST(&lro->lro_active); 1303 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1304 tcp_lro_flush(lro, queued); 1305 } 1306 #endif 1307 1308 xn_alloc_rx_buffers(rxq); 1309 1310 RING_FINAL_CHECK_FOR_RESPONSES(&rxq->ring, work_to_do); 1311 } while (work_to_do); 1312 } 1313 1314 static void 1315 xn_txeof(struct netfront_txq *txq) 1316 { 1317 RING_IDX i, prod; 1318 unsigned short id; 1319 struct ifnet *ifp; 1320 netif_tx_response_t *txr; 1321 struct mbuf *m; 1322 struct netfront_info *np = txq->info; 1323 1324 XN_TX_LOCK_ASSERT(txq); 1325 1326 if (!netfront_carrier_ok(np)) 1327 return; 1328 1329 ifp = np->xn_ifp; 1330 1331 do { 1332 prod = txq->ring.sring->rsp_prod; 1333 rmb(); /* Ensure we see responses up to 'rp'. */ 1334 1335 for (i = txq->ring.rsp_cons; i != prod; i++) { 1336 txr = RING_GET_RESPONSE(&txq->ring, i); 1337 if (txr->status == NETIF_RSP_NULL) 1338 continue; 1339 1340 if (txr->status != NETIF_RSP_OKAY) { 1341 printf("%s: WARNING: response is %d!\n", 1342 __func__, txr->status); 1343 } 1344 id = txr->id; 1345 m = txq->mbufs[id]; 1346 KASSERT(m != NULL, ("mbuf not found in chain")); 1347 KASSERT((uintptr_t)m > NET_TX_RING_SIZE, 1348 ("mbuf already on the free list, but we're " 1349 "trying to free it again!")); 1350 M_ASSERTVALID(m); 1351 1352 /* 1353 * Increment packet count if this is the last 1354 * mbuf of the chain. 1355 */ 1356 if (!m->m_next) 1357 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 1358 if (__predict_false(gnttab_query_foreign_access( 1359 txq->grant_ref[id]) != 0)) { 1360 panic("%s: grant id %u still in use by the " 1361 "backend", __func__, id); 1362 } 1363 gnttab_end_foreign_access_ref(txq->grant_ref[id]); 1364 gnttab_release_grant_reference( 1365 &txq->gref_head, txq->grant_ref[id]); 1366 txq->grant_ref[id] = GRANT_REF_INVALID; 1367 1368 txq->mbufs[id] = NULL; 1369 add_id_to_freelist(txq->mbufs, id); 1370 txq->mbufs_cnt--; 1371 m_free(m); 1372 /* Only mark the txq active if we've freed up at least one slot to try */ 1373 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1374 } 1375 txq->ring.rsp_cons = prod; 1376 1377 /* 1378 * Set a new event, then check for race with update of 1379 * tx_cons. Note that it is essential to schedule a 1380 * callback, no matter how few buffers are pending. Even if 1381 * there is space in the transmit ring, higher layers may 1382 * be blocked because too much data is outstanding: in such 1383 * cases notification from Xen is likely to be the only kick 1384 * that we'll get. 1385 */ 1386 txq->ring.sring->rsp_event = 1387 prod + ((txq->ring.sring->req_prod - prod) >> 1) + 1; 1388 1389 mb(); 1390 } while (prod != txq->ring.sring->rsp_prod); 1391 1392 if (txq->full && 1393 ((txq->ring.sring->req_prod - prod) < NET_TX_RING_SIZE)) { 1394 txq->full = false; 1395 taskqueue_enqueue(txq->tq, &txq->intrtask); 1396 } 1397 } 1398 1399 1400 static void 1401 xn_rxq_intr(void *xrxq) 1402 { 1403 struct netfront_rxq *rxq = xrxq; 1404 1405 taskqueue_enqueue_fast(rxq->tq, &rxq->intrtask); 1406 } 1407 1408 static void 1409 xn_txq_intr(void *xtxq) 1410 { 1411 struct netfront_txq *txq = xtxq; 1412 1413 taskqueue_enqueue_fast(txq->tq, &txq->intrtask); 1414 } 1415 1416 static int 1417 xn_intr(void *xsc) 1418 { 1419 struct netfront_txq *txq = xsc; 1420 struct netfront_info *np = txq->info; 1421 struct netfront_rxq *rxq = &np->rxq[txq->id]; 1422 1423 /* kick both tx and rx */ 1424 xn_rxq_intr(rxq); 1425 xn_txq_intr(txq); 1426 1427 return (FILTER_HANDLED); 1428 } 1429 1430 static void 1431 xn_move_rx_slot(struct netfront_rxq *rxq, struct mbuf *m, 1432 grant_ref_t ref) 1433 { 1434 int new = xn_rxidx(rxq->ring.req_prod_pvt); 1435 1436 KASSERT(rxq->mbufs[new] == NULL, ("mbufs != NULL")); 1437 rxq->mbufs[new] = m; 1438 rxq->grant_ref[new] = ref; 1439 RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->id = new; 1440 RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->gref = ref; 1441 rxq->ring.req_prod_pvt++; 1442 } 1443 1444 static int 1445 xn_get_extras(struct netfront_rxq *rxq, 1446 struct netif_extra_info *extras, RING_IDX rp, RING_IDX *cons) 1447 { 1448 struct netif_extra_info *extra; 1449 1450 int err = 0; 1451 1452 do { 1453 struct mbuf *m; 1454 grant_ref_t ref; 1455 1456 if (__predict_false(*cons + 1 == rp)) { 1457 err = EINVAL; 1458 break; 1459 } 1460 1461 extra = (struct netif_extra_info *) 1462 RING_GET_RESPONSE(&rxq->ring, ++(*cons)); 1463 1464 if (__predict_false(!extra->type || 1465 extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 1466 err = EINVAL; 1467 } else { 1468 memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); 1469 } 1470 1471 m = xn_get_rx_mbuf(rxq, *cons); 1472 ref = xn_get_rx_ref(rxq, *cons); 1473 xn_move_rx_slot(rxq, m, ref); 1474 } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); 1475 1476 return err; 1477 } 1478 1479 static int 1480 xn_get_responses(struct netfront_rxq *rxq, 1481 struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, 1482 struct mbuf **list) 1483 { 1484 struct netif_rx_response *rx = &rinfo->rx; 1485 struct netif_extra_info *extras = rinfo->extras; 1486 struct mbuf *m, *m0, *m_prev; 1487 grant_ref_t ref = xn_get_rx_ref(rxq, *cons); 1488 RING_IDX ref_cons = *cons; 1489 int frags = 1; 1490 int err = 0; 1491 u_long ret; 1492 1493 m0 = m = m_prev = xn_get_rx_mbuf(rxq, *cons); 1494 1495 if (rx->flags & NETRXF_extra_info) { 1496 err = xn_get_extras(rxq, extras, rp, cons); 1497 } 1498 1499 if (m0 != NULL) { 1500 m0->m_pkthdr.len = 0; 1501 m0->m_next = NULL; 1502 } 1503 1504 for (;;) { 1505 #if 0 1506 DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n", 1507 rx->status, rx->offset, frags); 1508 #endif 1509 if (__predict_false(rx->status < 0 || 1510 rx->offset + rx->status > PAGE_SIZE)) { 1511 1512 xn_move_rx_slot(rxq, m, ref); 1513 if (m0 == m) 1514 m0 = NULL; 1515 m = NULL; 1516 err = EINVAL; 1517 goto next_skip_queue; 1518 } 1519 1520 /* 1521 * This definitely indicates a bug, either in this driver or in 1522 * the backend driver. In future this should flag the bad 1523 * situation to the system controller to reboot the backed. 1524 */ 1525 if (ref == GRANT_REF_INVALID) { 1526 printf("%s: Bad rx response id %d.\n", __func__, rx->id); 1527 err = EINVAL; 1528 goto next; 1529 } 1530 1531 ret = gnttab_end_foreign_access_ref(ref); 1532 KASSERT(ret, ("Unable to end access to grant references")); 1533 1534 gnttab_release_grant_reference(&rxq->gref_head, ref); 1535 1536 next: 1537 if (m == NULL) 1538 break; 1539 1540 m->m_len = rx->status; 1541 m->m_data += rx->offset; 1542 m0->m_pkthdr.len += rx->status; 1543 1544 next_skip_queue: 1545 if (!(rx->flags & NETRXF_more_data)) 1546 break; 1547 1548 if (*cons + frags == rp) { 1549 if (net_ratelimit()) 1550 WPRINTK("Need more frags\n"); 1551 err = ENOENT; 1552 printf("%s: cons %u frags %u rp %u, not enough frags\n", 1553 __func__, *cons, frags, rp); 1554 break; 1555 } 1556 /* 1557 * Note that m can be NULL, if rx->status < 0 or if 1558 * rx->offset + rx->status > PAGE_SIZE above. 1559 */ 1560 m_prev = m; 1561 1562 rx = RING_GET_RESPONSE(&rxq->ring, *cons + frags); 1563 m = xn_get_rx_mbuf(rxq, *cons + frags); 1564 1565 /* 1566 * m_prev == NULL can happen if rx->status < 0 or if 1567 * rx->offset + * rx->status > PAGE_SIZE above. 1568 */ 1569 if (m_prev != NULL) 1570 m_prev->m_next = m; 1571 1572 /* 1573 * m0 can be NULL if rx->status < 0 or if * rx->offset + 1574 * rx->status > PAGE_SIZE above. 1575 */ 1576 if (m0 == NULL) 1577 m0 = m; 1578 m->m_next = NULL; 1579 ref = xn_get_rx_ref(rxq, *cons + frags); 1580 ref_cons = *cons + frags; 1581 frags++; 1582 } 1583 *list = m0; 1584 *cons += frags; 1585 1586 return (err); 1587 } 1588 1589 /** 1590 * \brief Count the number of fragments in an mbuf chain. 1591 * 1592 * Surprisingly, there isn't an M* macro for this. 1593 */ 1594 static inline int 1595 xn_count_frags(struct mbuf *m) 1596 { 1597 int nfrags; 1598 1599 for (nfrags = 0; m != NULL; m = m->m_next) 1600 nfrags++; 1601 1602 return (nfrags); 1603 } 1604 1605 /** 1606 * Given an mbuf chain, make sure we have enough room and then push 1607 * it onto the transmit ring. 1608 */ 1609 static int 1610 xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) 1611 { 1612 struct mbuf *m; 1613 struct netfront_info *np = txq->info; 1614 struct ifnet *ifp = np->xn_ifp; 1615 u_int nfrags; 1616 int otherend_id; 1617 1618 /** 1619 * Defragment the mbuf if necessary. 1620 */ 1621 nfrags = xn_count_frags(m_head); 1622 1623 /* 1624 * Check to see whether this request is longer than netback 1625 * can handle, and try to defrag it. 1626 */ 1627 /** 1628 * It is a bit lame, but the netback driver in Linux can't 1629 * deal with nfrags > MAX_TX_REQ_FRAGS, which is a quirk of 1630 * the Linux network stack. 1631 */ 1632 if (nfrags > np->maxfrags) { 1633 m = m_defrag(m_head, M_NOWAIT); 1634 if (!m) { 1635 /* 1636 * Defrag failed, so free the mbuf and 1637 * therefore drop the packet. 1638 */ 1639 m_freem(m_head); 1640 return (EMSGSIZE); 1641 } 1642 m_head = m; 1643 } 1644 1645 /* Determine how many fragments now exist */ 1646 nfrags = xn_count_frags(m_head); 1647 1648 /* 1649 * Check to see whether the defragmented packet has too many 1650 * segments for the Linux netback driver. 1651 */ 1652 /** 1653 * The FreeBSD TCP stack, with TSO enabled, can produce a chain 1654 * of mbufs longer than Linux can handle. Make sure we don't 1655 * pass a too-long chain over to the other side by dropping the 1656 * packet. It doesn't look like there is currently a way to 1657 * tell the TCP stack to generate a shorter chain of packets. 1658 */ 1659 if (nfrags > MAX_TX_REQ_FRAGS) { 1660 #ifdef DEBUG 1661 printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback " 1662 "won't be able to handle it, dropping\n", 1663 __func__, nfrags, MAX_TX_REQ_FRAGS); 1664 #endif 1665 m_freem(m_head); 1666 return (EMSGSIZE); 1667 } 1668 1669 /* 1670 * This check should be redundant. We've already verified that we 1671 * have enough slots in the ring to handle a packet of maximum 1672 * size, and that our packet is less than the maximum size. Keep 1673 * it in here as an assert for now just to make certain that 1674 * chain_cnt is accurate. 1675 */ 1676 KASSERT((txq->mbufs_cnt + nfrags) <= NET_TX_RING_SIZE, 1677 ("%s: chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE " 1678 "(%d)!", __func__, (int) txq->mbufs_cnt, 1679 (int) nfrags, (int) NET_TX_RING_SIZE)); 1680 1681 /* 1682 * Start packing the mbufs in this chain into 1683 * the fragment pointers. Stop when we run out 1684 * of fragments or hit the end of the mbuf chain. 1685 */ 1686 m = m_head; 1687 otherend_id = xenbus_get_otherend_id(np->xbdev); 1688 for (m = m_head; m; m = m->m_next) { 1689 netif_tx_request_t *tx; 1690 uintptr_t id; 1691 grant_ref_t ref; 1692 u_long mfn; /* XXX Wrong type? */ 1693 1694 tx = RING_GET_REQUEST(&txq->ring, txq->ring.req_prod_pvt); 1695 id = get_id_from_freelist(txq->mbufs); 1696 if (id == 0) 1697 panic("%s: was allocated the freelist head!\n", 1698 __func__); 1699 txq->mbufs_cnt++; 1700 if (txq->mbufs_cnt > NET_TX_RING_SIZE) 1701 panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n", 1702 __func__); 1703 txq->mbufs[id] = m; 1704 tx->id = id; 1705 ref = gnttab_claim_grant_reference(&txq->gref_head); 1706 KASSERT((short)ref >= 0, ("Negative ref")); 1707 mfn = virt_to_mfn(mtod(m, vm_offset_t)); 1708 gnttab_grant_foreign_access_ref(ref, otherend_id, 1709 mfn, GNTMAP_readonly); 1710 tx->gref = txq->grant_ref[id] = ref; 1711 tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1); 1712 tx->flags = 0; 1713 if (m == m_head) { 1714 /* 1715 * The first fragment has the entire packet 1716 * size, subsequent fragments have just the 1717 * fragment size. The backend works out the 1718 * true size of the first fragment by 1719 * subtracting the sizes of the other 1720 * fragments. 1721 */ 1722 tx->size = m->m_pkthdr.len; 1723 1724 /* 1725 * The first fragment contains the checksum flags 1726 * and is optionally followed by extra data for 1727 * TSO etc. 1728 */ 1729 /** 1730 * CSUM_TSO requires checksum offloading. 1731 * Some versions of FreeBSD fail to 1732 * set CSUM_TCP in the CSUM_TSO case, 1733 * so we have to test for CSUM_TSO 1734 * explicitly. 1735 */ 1736 if (m->m_pkthdr.csum_flags 1737 & (CSUM_DELAY_DATA | CSUM_TSO)) { 1738 tx->flags |= (NETTXF_csum_blank 1739 | NETTXF_data_validated); 1740 } 1741 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 1742 struct netif_extra_info *gso = 1743 (struct netif_extra_info *) 1744 RING_GET_REQUEST(&txq->ring, 1745 ++txq->ring.req_prod_pvt); 1746 1747 tx->flags |= NETTXF_extra_info; 1748 1749 gso->u.gso.size = m->m_pkthdr.tso_segsz; 1750 gso->u.gso.type = 1751 XEN_NETIF_GSO_TYPE_TCPV4; 1752 gso->u.gso.pad = 0; 1753 gso->u.gso.features = 0; 1754 1755 gso->type = XEN_NETIF_EXTRA_TYPE_GSO; 1756 gso->flags = 0; 1757 } 1758 } else { 1759 tx->size = m->m_len; 1760 } 1761 if (m->m_next) 1762 tx->flags |= NETTXF_more_data; 1763 1764 txq->ring.req_prod_pvt++; 1765 } 1766 BPF_MTAP(ifp, m_head); 1767 1768 xn_txeof(txq); 1769 1770 txq->stats.tx_bytes += m_head->m_pkthdr.len; 1771 txq->stats.tx_packets++; 1772 1773 return (0); 1774 } 1775 1776 /* equivalent of network_open() in Linux */ 1777 static void 1778 xn_ifinit_locked(struct netfront_info *np) 1779 { 1780 struct ifnet *ifp; 1781 int i; 1782 struct netfront_rxq *rxq; 1783 1784 XN_LOCK_ASSERT(np); 1785 1786 ifp = np->xn_ifp; 1787 1788 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1789 return; 1790 1791 xn_stop(np); 1792 1793 for (i = 0; i < np->num_queues; i++) { 1794 rxq = &np->rxq[i]; 1795 xn_alloc_rx_buffers(rxq); 1796 rxq->ring.sring->rsp_event = rxq->ring.rsp_cons + 1; 1797 } 1798 1799 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1800 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1801 if_link_state_change(ifp, LINK_STATE_UP); 1802 } 1803 1804 static void 1805 xn_ifinit(void *xsc) 1806 { 1807 struct netfront_info *sc = xsc; 1808 1809 XN_LOCK(sc); 1810 xn_ifinit_locked(sc); 1811 XN_UNLOCK(sc); 1812 } 1813 1814 static int 1815 xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1816 { 1817 struct netfront_info *sc = ifp->if_softc; 1818 struct ifreq *ifr = (struct ifreq *) data; 1819 #ifdef INET 1820 struct ifaddr *ifa = (struct ifaddr *)data; 1821 #endif 1822 1823 int mask, error = 0; 1824 switch(cmd) { 1825 case SIOCSIFADDR: 1826 #ifdef INET 1827 XN_LOCK(sc); 1828 if (ifa->ifa_addr->sa_family == AF_INET) { 1829 ifp->if_flags |= IFF_UP; 1830 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 1831 xn_ifinit_locked(sc); 1832 arp_ifinit(ifp, ifa); 1833 XN_UNLOCK(sc); 1834 } else { 1835 XN_UNLOCK(sc); 1836 #endif 1837 error = ether_ioctl(ifp, cmd, data); 1838 #ifdef INET 1839 } 1840 #endif 1841 break; 1842 case SIOCSIFMTU: 1843 ifp->if_mtu = ifr->ifr_mtu; 1844 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1845 xn_ifinit(sc); 1846 break; 1847 case SIOCSIFFLAGS: 1848 XN_LOCK(sc); 1849 if (ifp->if_flags & IFF_UP) { 1850 /* 1851 * If only the state of the PROMISC flag changed, 1852 * then just use the 'set promisc mode' command 1853 * instead of reinitializing the entire NIC. Doing 1854 * a full re-init means reloading the firmware and 1855 * waiting for it to start up, which may take a 1856 * second or two. 1857 */ 1858 xn_ifinit_locked(sc); 1859 } else { 1860 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1861 xn_stop(sc); 1862 } 1863 } 1864 sc->xn_if_flags = ifp->if_flags; 1865 XN_UNLOCK(sc); 1866 error = 0; 1867 break; 1868 case SIOCSIFCAP: 1869 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 1870 if (mask & IFCAP_TXCSUM) { 1871 if (IFCAP_TXCSUM & ifp->if_capenable) { 1872 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 1873 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 1874 | CSUM_IP | CSUM_TSO); 1875 } else { 1876 ifp->if_capenable |= IFCAP_TXCSUM; 1877 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP 1878 | CSUM_IP); 1879 } 1880 } 1881 if (mask & IFCAP_RXCSUM) { 1882 ifp->if_capenable ^= IFCAP_RXCSUM; 1883 } 1884 if (mask & IFCAP_TSO4) { 1885 if (IFCAP_TSO4 & ifp->if_capenable) { 1886 ifp->if_capenable &= ~IFCAP_TSO4; 1887 ifp->if_hwassist &= ~CSUM_TSO; 1888 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 1889 ifp->if_capenable |= IFCAP_TSO4; 1890 ifp->if_hwassist |= CSUM_TSO; 1891 } else { 1892 IPRINTK("Xen requires tx checksum offload" 1893 " be enabled to use TSO\n"); 1894 error = EINVAL; 1895 } 1896 } 1897 if (mask & IFCAP_LRO) { 1898 ifp->if_capenable ^= IFCAP_LRO; 1899 1900 } 1901 error = 0; 1902 break; 1903 case SIOCADDMULTI: 1904 case SIOCDELMULTI: 1905 break; 1906 case SIOCSIFMEDIA: 1907 case SIOCGIFMEDIA: 1908 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 1909 break; 1910 default: 1911 error = ether_ioctl(ifp, cmd, data); 1912 } 1913 1914 return (error); 1915 } 1916 1917 static void 1918 xn_stop(struct netfront_info *sc) 1919 { 1920 struct ifnet *ifp; 1921 1922 XN_LOCK_ASSERT(sc); 1923 1924 ifp = sc->xn_ifp; 1925 1926 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 1927 if_link_state_change(ifp, LINK_STATE_DOWN); 1928 } 1929 1930 static void 1931 xn_rebuild_rx_bufs(struct netfront_rxq *rxq) 1932 { 1933 int requeue_idx, i; 1934 grant_ref_t ref; 1935 netif_rx_request_t *req; 1936 1937 for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { 1938 struct mbuf *m; 1939 u_long pfn; 1940 1941 if (rxq->mbufs[i] == NULL) 1942 continue; 1943 1944 m = rxq->mbufs[requeue_idx] = xn_get_rx_mbuf(rxq, i); 1945 ref = rxq->grant_ref[requeue_idx] = xn_get_rx_ref(rxq, i); 1946 1947 req = RING_GET_REQUEST(&rxq->ring, requeue_idx); 1948 pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT; 1949 1950 gnttab_grant_foreign_access_ref(ref, 1951 xenbus_get_otherend_id(rxq->info->xbdev), 1952 pfn, 0); 1953 1954 req->gref = ref; 1955 req->id = requeue_idx; 1956 1957 requeue_idx++; 1958 } 1959 1960 rxq->ring.req_prod_pvt = requeue_idx; 1961 } 1962 1963 /* START of Xenolinux helper functions adapted to FreeBSD */ 1964 int 1965 xn_connect(struct netfront_info *np) 1966 { 1967 int i, error; 1968 u_int feature_rx_copy; 1969 struct netfront_rxq *rxq; 1970 struct netfront_txq *txq; 1971 1972 error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 1973 "feature-rx-copy", NULL, "%u", &feature_rx_copy); 1974 if (error != 0) 1975 feature_rx_copy = 0; 1976 1977 /* We only support rx copy. */ 1978 if (!feature_rx_copy) 1979 return (EPROTONOSUPPORT); 1980 1981 /* Recovery procedure: */ 1982 error = talk_to_backend(np->xbdev, np); 1983 if (error != 0) 1984 return (error); 1985 1986 /* Step 1: Reinitialise variables. */ 1987 xn_query_features(np); 1988 xn_configure_features(np); 1989 1990 /* Step 2: Release TX buffer */ 1991 for (i = 0; i < np->num_queues; i++) { 1992 txq = &np->txq[i]; 1993 xn_release_tx_bufs(txq); 1994 } 1995 1996 /* Step 3: Rebuild the RX buffer freelist and the RX ring itself. */ 1997 for (i = 0; i < np->num_queues; i++) { 1998 rxq = &np->rxq[i]; 1999 xn_rebuild_rx_bufs(rxq); 2000 } 2001 2002 /* Step 4: All public and private state should now be sane. Get 2003 * ready to start sending and receiving packets and give the driver 2004 * domain a kick because we've probably just requeued some 2005 * packets. 2006 */ 2007 netfront_carrier_on(np); 2008 for (i = 0; i < np->num_queues; i++) { 2009 txq = &np->txq[i]; 2010 xen_intr_signal(txq->xen_intr_handle); 2011 XN_TX_LOCK(txq); 2012 xn_txeof(txq); 2013 XN_TX_UNLOCK(txq); 2014 xn_alloc_rx_buffers(rxq); 2015 } 2016 2017 return (0); 2018 } 2019 2020 static void 2021 xn_query_features(struct netfront_info *np) 2022 { 2023 int val; 2024 2025 device_printf(np->xbdev, "backend features:"); 2026 2027 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2028 "feature-sg", NULL, "%d", &val) < 0) 2029 val = 0; 2030 2031 np->maxfrags = 1; 2032 if (val) { 2033 np->maxfrags = MAX_TX_REQ_FRAGS; 2034 printf(" feature-sg"); 2035 } 2036 2037 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2038 "feature-gso-tcpv4", NULL, "%d", &val) < 0) 2039 val = 0; 2040 2041 np->xn_ifp->if_capabilities &= ~(IFCAP_TSO4|IFCAP_LRO); 2042 if (val) { 2043 np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO; 2044 printf(" feature-gso-tcp4"); 2045 } 2046 2047 printf("\n"); 2048 } 2049 2050 static int 2051 xn_configure_features(struct netfront_info *np) 2052 { 2053 int err, cap_enabled; 2054 #if (defined(INET) || defined(INET6)) 2055 int i; 2056 #endif 2057 2058 err = 0; 2059 2060 if (np->xn_resume && 2061 ((np->xn_ifp->if_capenable & np->xn_ifp->if_capabilities) 2062 == np->xn_ifp->if_capenable)) { 2063 /* Current options are available, no need to do anything. */ 2064 return (0); 2065 } 2066 2067 /* Try to preserve as many options as possible. */ 2068 if (np->xn_resume) 2069 cap_enabled = np->xn_ifp->if_capenable; 2070 else 2071 cap_enabled = UINT_MAX; 2072 2073 #if (defined(INET) || defined(INET6)) 2074 for (i = 0; i < np->num_queues; i++) 2075 if ((np->xn_ifp->if_capenable & IFCAP_LRO) == 2076 (cap_enabled & IFCAP_LRO)) 2077 tcp_lro_free(&np->rxq[i].lro); 2078 #endif 2079 np->xn_ifp->if_capenable = 2080 np->xn_ifp->if_capabilities & ~(IFCAP_LRO|IFCAP_TSO4) & cap_enabled; 2081 np->xn_ifp->if_hwassist &= ~CSUM_TSO; 2082 #if (defined(INET) || defined(INET6)) 2083 for (i = 0; i < np->num_queues; i++) { 2084 if (xn_enable_lro && (np->xn_ifp->if_capabilities & IFCAP_LRO) == 2085 (cap_enabled & IFCAP_LRO)) { 2086 err = tcp_lro_init(&np->rxq[i].lro); 2087 if (err != 0) { 2088 device_printf(np->xbdev, "LRO initialization failed\n"); 2089 } else { 2090 np->rxq[i].lro.ifp = np->xn_ifp; 2091 np->xn_ifp->if_capenable |= IFCAP_LRO; 2092 } 2093 } 2094 } 2095 if ((np->xn_ifp->if_capabilities & IFCAP_TSO4) == 2096 (cap_enabled & IFCAP_TSO4)) { 2097 np->xn_ifp->if_capenable |= IFCAP_TSO4; 2098 np->xn_ifp->if_hwassist |= CSUM_TSO; 2099 } 2100 #endif 2101 return (err); 2102 } 2103 2104 static int 2105 xn_txq_mq_start_locked(struct netfront_txq *txq, struct mbuf *m) 2106 { 2107 struct netfront_info *np; 2108 struct ifnet *ifp; 2109 struct buf_ring *br; 2110 int error, notify; 2111 2112 np = txq->info; 2113 br = txq->br; 2114 ifp = np->xn_ifp; 2115 error = 0; 2116 2117 XN_TX_LOCK_ASSERT(txq); 2118 2119 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2120 !netfront_carrier_ok(np)) { 2121 if (m != NULL) 2122 error = drbr_enqueue(ifp, br, m); 2123 return (error); 2124 } 2125 2126 if (m != NULL) { 2127 error = drbr_enqueue(ifp, br, m); 2128 if (error != 0) 2129 return (error); 2130 } 2131 2132 while ((m = drbr_peek(ifp, br)) != NULL) { 2133 if (!xn_tx_slot_available(txq)) { 2134 drbr_putback(ifp, br, m); 2135 break; 2136 } 2137 2138 error = xn_assemble_tx_request(txq, m); 2139 /* xn_assemble_tx_request always consumes the mbuf*/ 2140 if (error != 0) { 2141 drbr_advance(ifp, br); 2142 break; 2143 } 2144 2145 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&txq->ring, notify); 2146 if (notify) 2147 xen_intr_signal(txq->xen_intr_handle); 2148 2149 drbr_advance(ifp, br); 2150 } 2151 2152 if (RING_FULL(&txq->ring)) 2153 txq->full = true; 2154 2155 return (0); 2156 } 2157 2158 static int 2159 xn_txq_mq_start(struct ifnet *ifp, struct mbuf *m) 2160 { 2161 struct netfront_info *np; 2162 struct netfront_txq *txq; 2163 int i, npairs, error; 2164 2165 np = ifp->if_softc; 2166 npairs = np->num_queues; 2167 2168 /* check if flowid is set */ 2169 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 2170 i = m->m_pkthdr.flowid % npairs; 2171 else 2172 i = curcpu % npairs; 2173 2174 txq = &np->txq[i]; 2175 2176 if (XN_TX_TRYLOCK(txq) != 0) { 2177 error = xn_txq_mq_start_locked(txq, m); 2178 XN_TX_UNLOCK(txq); 2179 } else { 2180 error = drbr_enqueue(ifp, txq->br, m); 2181 taskqueue_enqueue(txq->tq, &txq->defrtask); 2182 } 2183 2184 return (error); 2185 } 2186 2187 static void 2188 xn_qflush(struct ifnet *ifp) 2189 { 2190 struct netfront_info *np; 2191 struct netfront_txq *txq; 2192 struct mbuf *m; 2193 int i; 2194 2195 np = ifp->if_softc; 2196 2197 for (i = 0; i < np->num_queues; i++) { 2198 txq = &np->txq[i]; 2199 2200 XN_TX_LOCK(txq); 2201 while ((m = buf_ring_dequeue_sc(txq->br)) != NULL) 2202 m_freem(m); 2203 XN_TX_UNLOCK(txq); 2204 } 2205 2206 if_qflush(ifp); 2207 } 2208 2209 /** 2210 * Create a network device. 2211 * @param dev Newbus device representing this virtual NIC. 2212 */ 2213 int 2214 create_netdev(device_t dev) 2215 { 2216 struct netfront_info *np; 2217 int err; 2218 struct ifnet *ifp; 2219 2220 np = device_get_softc(dev); 2221 2222 np->xbdev = dev; 2223 2224 mtx_init(&np->sc_lock, "xnsc", "netfront softc lock", MTX_DEF); 2225 2226 ifmedia_init(&np->sc_media, 0, xn_ifmedia_upd, xn_ifmedia_sts); 2227 ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); 2228 ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL); 2229 2230 np->rx_min_target = RX_MIN_TARGET; 2231 np->rx_max_target = RX_MAX_TARGET; 2232 2233 err = xen_net_read_mac(dev, np->mac); 2234 if (err != 0) 2235 goto error; 2236 2237 /* Set up ifnet structure */ 2238 ifp = np->xn_ifp = if_alloc(IFT_ETHER); 2239 ifp->if_softc = np; 2240 if_initname(ifp, "xn", device_get_unit(dev)); 2241 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 2242 ifp->if_ioctl = xn_ioctl; 2243 2244 ifp->if_transmit = xn_txq_mq_start; 2245 ifp->if_qflush = xn_qflush; 2246 2247 ifp->if_init = xn_ifinit; 2248 2249 ifp->if_hwassist = XN_CSUM_FEATURES; 2250 ifp->if_capabilities = IFCAP_HWCSUM; 2251 ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 2252 ifp->if_hw_tsomaxsegcount = MAX_TX_REQ_FRAGS; 2253 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 2254 2255 ether_ifattach(ifp, np->mac); 2256 netfront_carrier_off(np); 2257 2258 return (0); 2259 2260 error: 2261 KASSERT(err != 0, ("Error path with no error code specified")); 2262 return (err); 2263 } 2264 2265 static int 2266 netfront_detach(device_t dev) 2267 { 2268 struct netfront_info *info = device_get_softc(dev); 2269 2270 DPRINTK("%s\n", xenbus_get_node(dev)); 2271 2272 netif_free(info); 2273 2274 return 0; 2275 } 2276 2277 static void 2278 netif_free(struct netfront_info *np) 2279 { 2280 2281 XN_LOCK(np); 2282 xn_stop(np); 2283 XN_UNLOCK(np); 2284 netif_disconnect_backend(np); 2285 free(np->rxq, M_DEVBUF); 2286 free(np->txq, M_DEVBUF); 2287 if (np->xn_ifp != NULL) { 2288 ether_ifdetach(np->xn_ifp); 2289 if_free(np->xn_ifp); 2290 np->xn_ifp = NULL; 2291 } 2292 ifmedia_removeall(&np->sc_media); 2293 } 2294 2295 static void 2296 netif_disconnect_backend(struct netfront_info *np) 2297 { 2298 u_int i; 2299 2300 for (i = 0; i < np->num_queues; i++) { 2301 XN_RX_LOCK(&np->rxq[i]); 2302 XN_TX_LOCK(&np->txq[i]); 2303 } 2304 netfront_carrier_off(np); 2305 for (i = 0; i < np->num_queues; i++) { 2306 XN_RX_UNLOCK(&np->rxq[i]); 2307 XN_TX_UNLOCK(&np->txq[i]); 2308 } 2309 2310 for (i = 0; i < np->num_queues; i++) { 2311 disconnect_rxq(&np->rxq[i]); 2312 disconnect_txq(&np->txq[i]); 2313 } 2314 } 2315 2316 static int 2317 xn_ifmedia_upd(struct ifnet *ifp) 2318 { 2319 2320 return (0); 2321 } 2322 2323 static void 2324 xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 2325 { 2326 2327 ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE; 2328 ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; 2329 } 2330 2331 /* ** Driver registration ** */ 2332 static device_method_t netfront_methods[] = { 2333 /* Device interface */ 2334 DEVMETHOD(device_probe, netfront_probe), 2335 DEVMETHOD(device_attach, netfront_attach), 2336 DEVMETHOD(device_detach, netfront_detach), 2337 DEVMETHOD(device_shutdown, bus_generic_shutdown), 2338 DEVMETHOD(device_suspend, netfront_suspend), 2339 DEVMETHOD(device_resume, netfront_resume), 2340 2341 /* Xenbus interface */ 2342 DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed), 2343 2344 DEVMETHOD_END 2345 }; 2346 2347 static driver_t netfront_driver = { 2348 "xn", 2349 netfront_methods, 2350 sizeof(struct netfront_info), 2351 }; 2352 devclass_t netfront_devclass; 2353 2354 DRIVER_MODULE(xe, xenbusb_front, netfront_driver, netfront_devclass, NULL, 2355 NULL); 2356