1 /*- 2 * Copyright (c) 2004-2006 Kip Macy 3 * Copyright (c) 2015 Wei Liu <wei.liu2@citrix.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/sockio.h> 36 #include <sys/limits.h> 37 #include <sys/mbuf.h> 38 #include <sys/malloc.h> 39 #include <sys/module.h> 40 #include <sys/kernel.h> 41 #include <sys/socket.h> 42 #include <sys/sysctl.h> 43 #include <sys/taskqueue.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_arp.h> 48 #include <net/ethernet.h> 49 #include <net/if_media.h> 50 #include <net/bpf.h> 51 #include <net/if_types.h> 52 53 #include <netinet/in.h> 54 #include <netinet/ip.h> 55 #include <netinet/if_ether.h> 56 #include <netinet/tcp.h> 57 #include <netinet/tcp_lro.h> 58 59 #include <vm/vm.h> 60 #include <vm/pmap.h> 61 62 #include <sys/bus.h> 63 64 #include <xen/xen-os.h> 65 #include <xen/hypervisor.h> 66 #include <xen/xen_intr.h> 67 #include <xen/gnttab.h> 68 #include <xen/interface/memory.h> 69 #include <xen/interface/io/netif.h> 70 #include <xen/xenbus/xenbusvar.h> 71 72 #include "xenbus_if.h" 73 74 /* Features supported by all backends. TSO and LRO can be negotiated */ 75 #define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) 76 77 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) 78 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) 79 80 /* 81 * Should the driver do LRO on the RX end 82 * this can be toggled on the fly, but the 83 * interface must be reset (down/up) for it 84 * to take effect. 85 */ 86 static int xn_enable_lro = 1; 87 TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro); 88 89 /* 90 * Number of pairs of queues. 91 */ 92 static unsigned long xn_num_queues = 4; 93 TUNABLE_ULONG("hw.xn.num_queues", &xn_num_queues); 94 95 /** 96 * \brief The maximum allowed data fragments in a single transmit 97 * request. 98 * 99 * This limit is imposed by the backend driver. We assume here that 100 * we are dealing with a Linux driver domain and have set our limit 101 * to mirror the Linux MAX_SKB_FRAGS constant. 102 */ 103 #define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2) 104 105 #define RX_COPY_THRESHOLD 256 106 107 #define net_ratelimit() 0 108 109 struct netfront_rxq; 110 struct netfront_txq; 111 struct netfront_info; 112 struct netfront_rx_info; 113 114 static void xn_txeof(struct netfront_txq *); 115 static void xn_rxeof(struct netfront_rxq *); 116 static void xn_alloc_rx_buffers(struct netfront_rxq *); 117 118 static void xn_release_rx_bufs(struct netfront_rxq *); 119 static void xn_release_tx_bufs(struct netfront_txq *); 120 121 static void xn_rxq_intr(void *); 122 static void xn_txq_intr(void *); 123 static int xn_intr(void *); 124 static inline int xn_count_frags(struct mbuf *m); 125 static int xn_assemble_tx_request(struct netfront_txq *, struct mbuf *); 126 static int xn_ioctl(struct ifnet *, u_long, caddr_t); 127 static void xn_ifinit_locked(struct netfront_info *); 128 static void xn_ifinit(void *); 129 static void xn_stop(struct netfront_info *); 130 static void xn_query_features(struct netfront_info *np); 131 static int xn_configure_features(struct netfront_info *np); 132 static void netif_free(struct netfront_info *info); 133 static int netfront_detach(device_t dev); 134 135 static int xn_txq_mq_start_locked(struct netfront_txq *, struct mbuf *); 136 static int xn_txq_mq_start(struct ifnet *, struct mbuf *); 137 138 static int talk_to_backend(device_t dev, struct netfront_info *info); 139 static int create_netdev(device_t dev); 140 static void netif_disconnect_backend(struct netfront_info *info); 141 static int setup_device(device_t dev, struct netfront_info *info, 142 unsigned long); 143 static int xn_ifmedia_upd(struct ifnet *ifp); 144 static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); 145 146 int xn_connect(struct netfront_info *); 147 148 static int xn_get_responses(struct netfront_rxq *, 149 struct netfront_rx_info *, RING_IDX, RING_IDX *, 150 struct mbuf **); 151 152 #define virt_to_mfn(x) (vtophys(x) >> PAGE_SHIFT) 153 154 #define INVALID_P2M_ENTRY (~0UL) 155 156 struct xn_rx_stats 157 { 158 u_long rx_packets; /* total packets received */ 159 u_long rx_bytes; /* total bytes received */ 160 u_long rx_errors; /* bad packets received */ 161 }; 162 163 struct xn_tx_stats 164 { 165 u_long tx_packets; /* total packets transmitted */ 166 u_long tx_bytes; /* total bytes transmitted */ 167 u_long tx_errors; /* packet transmit problems */ 168 }; 169 170 #define XN_QUEUE_NAME_LEN 8 /* xn{t,r}x_%u, allow for two digits */ 171 struct netfront_rxq { 172 struct netfront_info *info; 173 u_int id; 174 char name[XN_QUEUE_NAME_LEN]; 175 struct mtx lock; 176 177 int ring_ref; 178 netif_rx_front_ring_t ring; 179 xen_intr_handle_t xen_intr_handle; 180 181 grant_ref_t gref_head; 182 grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; 183 184 struct mbuf *mbufs[NET_RX_RING_SIZE + 1]; 185 struct mbufq batch; /* batch queue */ 186 int target; 187 188 xen_pfn_t pfn_array[NET_RX_RING_SIZE]; 189 190 struct lro_ctrl lro; 191 192 struct taskqueue *tq; 193 struct task intrtask; 194 195 struct xn_rx_stats stats; 196 }; 197 198 struct netfront_txq { 199 struct netfront_info *info; 200 u_int id; 201 char name[XN_QUEUE_NAME_LEN]; 202 struct mtx lock; 203 204 int ring_ref; 205 netif_tx_front_ring_t ring; 206 xen_intr_handle_t xen_intr_handle; 207 208 grant_ref_t gref_head; 209 grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; 210 211 struct mbuf *mbufs[NET_TX_RING_SIZE + 1]; 212 int mbufs_cnt; 213 struct buf_ring *br; 214 215 struct taskqueue *tq; 216 struct task intrtask; 217 struct task defrtask; 218 219 bool full; 220 221 struct xn_tx_stats stats; 222 }; 223 224 struct netfront_info { 225 struct ifnet *xn_ifp; 226 227 struct mtx sc_lock; 228 229 u_int num_queues; 230 struct netfront_rxq *rxq; 231 struct netfront_txq *txq; 232 233 u_int carrier; 234 u_int maxfrags; 235 236 /* Receive-ring batched refills. */ 237 #define RX_MIN_TARGET 32 238 #define RX_MAX_TARGET NET_RX_RING_SIZE 239 int rx_min_target; 240 int rx_max_target; 241 242 device_t xbdev; 243 uint8_t mac[ETHER_ADDR_LEN]; 244 245 int xn_if_flags; 246 247 struct ifmedia sc_media; 248 249 bool xn_resume; 250 }; 251 252 struct netfront_rx_info { 253 struct netif_rx_response rx; 254 struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; 255 }; 256 257 #define XN_RX_LOCK(_q) mtx_lock(&(_q)->lock) 258 #define XN_RX_UNLOCK(_q) mtx_unlock(&(_q)->lock) 259 260 #define XN_TX_LOCK(_q) mtx_lock(&(_q)->lock) 261 #define XN_TX_TRYLOCK(_q) mtx_trylock(&(_q)->lock) 262 #define XN_TX_UNLOCK(_q) mtx_unlock(&(_q)->lock) 263 264 #define XN_LOCK(_sc) mtx_lock(&(_sc)->sc_lock); 265 #define XN_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_lock); 266 267 #define XN_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_lock, MA_OWNED); 268 #define XN_RX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); 269 #define XN_TX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); 270 271 #define netfront_carrier_on(netif) ((netif)->carrier = 1) 272 #define netfront_carrier_off(netif) ((netif)->carrier = 0) 273 #define netfront_carrier_ok(netif) ((netif)->carrier) 274 275 /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */ 276 277 static inline void 278 add_id_to_freelist(struct mbuf **list, uintptr_t id) 279 { 280 281 KASSERT(id != 0, 282 ("%s: the head item (0) must always be free.", __func__)); 283 list[id] = list[0]; 284 list[0] = (struct mbuf *)id; 285 } 286 287 static inline unsigned short 288 get_id_from_freelist(struct mbuf **list) 289 { 290 uintptr_t id; 291 292 id = (uintptr_t)list[0]; 293 KASSERT(id != 0, 294 ("%s: the head item (0) must always remain free.", __func__)); 295 list[0] = list[id]; 296 return (id); 297 } 298 299 static inline int 300 xn_rxidx(RING_IDX idx) 301 { 302 303 return idx & (NET_RX_RING_SIZE - 1); 304 } 305 306 static inline struct mbuf * 307 xn_get_rx_mbuf(struct netfront_rxq *rxq, RING_IDX ri) 308 { 309 int i; 310 struct mbuf *m; 311 312 i = xn_rxidx(ri); 313 m = rxq->mbufs[i]; 314 rxq->mbufs[i] = NULL; 315 return (m); 316 } 317 318 static inline grant_ref_t 319 xn_get_rx_ref(struct netfront_rxq *rxq, RING_IDX ri) 320 { 321 int i = xn_rxidx(ri); 322 grant_ref_t ref = rxq->grant_ref[i]; 323 324 KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n")); 325 rxq->grant_ref[i] = GRANT_REF_INVALID; 326 return (ref); 327 } 328 329 #define IPRINTK(fmt, args...) \ 330 printf("[XEN] " fmt, ##args) 331 #ifdef INVARIANTS 332 #define WPRINTK(fmt, args...) \ 333 printf("[XEN] " fmt, ##args) 334 #else 335 #define WPRINTK(fmt, args...) 336 #endif 337 #ifdef DEBUG 338 #define DPRINTK(fmt, args...) \ 339 printf("[XEN] %s: " fmt, __func__, ##args) 340 #else 341 #define DPRINTK(fmt, args...) 342 #endif 343 344 /** 345 * Read the 'mac' node at the given device's node in the store, and parse that 346 * as colon-separated octets, placing result the given mac array. mac must be 347 * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h). 348 * Return 0 on success, or errno on error. 349 */ 350 static int 351 xen_net_read_mac(device_t dev, uint8_t mac[]) 352 { 353 int error, i; 354 char *s, *e, *macstr; 355 const char *path; 356 357 path = xenbus_get_node(dev); 358 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 359 if (error == ENOENT) { 360 /* 361 * Deal with missing mac XenStore nodes on devices with 362 * HVM emulation (the 'ioemu' configuration attribute) 363 * enabled. 364 * 365 * The HVM emulator may execute in a stub device model 366 * domain which lacks the permission, only given to Dom0, 367 * to update the guest's XenStore tree. For this reason, 368 * the HVM emulator doesn't even attempt to write the 369 * front-side mac node, even when operating in Dom0. 370 * However, there should always be a mac listed in the 371 * backend tree. Fallback to this version if our query 372 * of the front side XenStore location doesn't find 373 * anything. 374 */ 375 path = xenbus_get_otherend_path(dev); 376 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 377 } 378 if (error != 0) { 379 xenbus_dev_fatal(dev, error, "parsing %s/mac", path); 380 return (error); 381 } 382 383 s = macstr; 384 for (i = 0; i < ETHER_ADDR_LEN; i++) { 385 mac[i] = strtoul(s, &e, 16); 386 if (s == e || (e[0] != ':' && e[0] != 0)) { 387 free(macstr, M_XENBUS); 388 return (ENOENT); 389 } 390 s = &e[1]; 391 } 392 free(macstr, M_XENBUS); 393 return (0); 394 } 395 396 /** 397 * Entry point to this code when a new device is created. Allocate the basic 398 * structures and the ring buffers for communication with the backend, and 399 * inform the backend of the appropriate details for those. Switch to 400 * Connected state. 401 */ 402 static int 403 netfront_probe(device_t dev) 404 { 405 406 if (xen_hvm_domain() && xen_disable_pv_nics != 0) 407 return (ENXIO); 408 409 if (!strcmp(xenbus_get_type(dev), "vif")) { 410 device_set_desc(dev, "Virtual Network Interface"); 411 return (0); 412 } 413 414 return (ENXIO); 415 } 416 417 static int 418 netfront_attach(device_t dev) 419 { 420 int err; 421 422 err = create_netdev(dev); 423 if (err != 0) { 424 xenbus_dev_fatal(dev, err, "creating netdev"); 425 return (err); 426 } 427 428 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), 429 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 430 OID_AUTO, "enable_lro", CTLFLAG_RW, 431 &xn_enable_lro, 0, "Large Receive Offload"); 432 433 SYSCTL_ADD_ULONG(device_get_sysctl_ctx(dev), 434 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 435 OID_AUTO, "num_queues", CTLFLAG_RD, 436 &xn_num_queues, "Number of pairs of queues"); 437 438 return (0); 439 } 440 441 static int 442 netfront_suspend(device_t dev) 443 { 444 struct netfront_info *np = device_get_softc(dev); 445 u_int i; 446 447 for (i = 0; i < np->num_queues; i++) { 448 XN_RX_LOCK(&np->rxq[i]); 449 XN_TX_LOCK(&np->txq[i]); 450 } 451 netfront_carrier_off(np); 452 for (i = 0; i < np->num_queues; i++) { 453 XN_RX_UNLOCK(&np->rxq[i]); 454 XN_TX_UNLOCK(&np->txq[i]); 455 } 456 return (0); 457 } 458 459 /** 460 * We are reconnecting to the backend, due to a suspend/resume, or a backend 461 * driver restart. We tear down our netif structure and recreate it, but 462 * leave the device-layer structures intact so that this is transparent to the 463 * rest of the kernel. 464 */ 465 static int 466 netfront_resume(device_t dev) 467 { 468 struct netfront_info *info = device_get_softc(dev); 469 470 info->xn_resume = true; 471 netif_disconnect_backend(info); 472 return (0); 473 } 474 475 static int 476 write_queue_xenstore_keys(device_t dev, 477 struct netfront_rxq *rxq, 478 struct netfront_txq *txq, 479 struct xs_transaction *xst, bool hierarchy) 480 { 481 int err; 482 const char *message; 483 const char *node = xenbus_get_node(dev); 484 char *path; 485 size_t path_size; 486 487 KASSERT(rxq->id == txq->id, ("Mismatch between RX and TX queue ids")); 488 /* Split event channel support is not yet there. */ 489 KASSERT(rxq->xen_intr_handle == txq->xen_intr_handle, 490 ("Split event channels are not supported")); 491 492 if (hierarchy) { 493 path_size = strlen(node) + 10; 494 path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); 495 snprintf(path, path_size, "%s/queue-%u", node, rxq->id); 496 } else { 497 path_size = strlen(node) + 1; 498 path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); 499 snprintf(path, path_size, "%s", node); 500 } 501 502 err = xs_printf(*xst, path, "tx-ring-ref","%u", txq->ring_ref); 503 if (err != 0) { 504 message = "writing tx ring-ref"; 505 goto error; 506 } 507 err = xs_printf(*xst, path, "rx-ring-ref","%u", rxq->ring_ref); 508 if (err != 0) { 509 message = "writing rx ring-ref"; 510 goto error; 511 } 512 err = xs_printf(*xst, path, "event-channel", "%u", 513 xen_intr_port(rxq->xen_intr_handle)); 514 if (err != 0) { 515 message = "writing event-channel"; 516 goto error; 517 } 518 519 free(path, M_DEVBUF); 520 521 return (0); 522 523 error: 524 free(path, M_DEVBUF); 525 xenbus_dev_fatal(dev, err, "%s", message); 526 527 return (err); 528 } 529 530 /* Common code used when first setting up, and when resuming. */ 531 static int 532 talk_to_backend(device_t dev, struct netfront_info *info) 533 { 534 const char *message; 535 struct xs_transaction xst; 536 const char *node = xenbus_get_node(dev); 537 int err; 538 unsigned long num_queues, max_queues = 0; 539 unsigned int i; 540 541 err = xen_net_read_mac(dev, info->mac); 542 if (err != 0) { 543 xenbus_dev_fatal(dev, err, "parsing %s/mac", node); 544 goto out; 545 } 546 547 err = xs_scanf(XST_NIL, xenbus_get_otherend_path(info->xbdev), 548 "multi-queue-max-queues", NULL, "%lu", &max_queues); 549 if (err != 0) 550 max_queues = 1; 551 num_queues = xn_num_queues; 552 if (num_queues > max_queues) 553 num_queues = max_queues; 554 555 err = setup_device(dev, info, num_queues); 556 if (err != 0) 557 goto out; 558 559 again: 560 err = xs_transaction_start(&xst); 561 if (err != 0) { 562 xenbus_dev_fatal(dev, err, "starting transaction"); 563 goto free; 564 } 565 566 if (info->num_queues == 1) { 567 err = write_queue_xenstore_keys(dev, &info->rxq[0], 568 &info->txq[0], &xst, false); 569 if (err != 0) 570 goto abort_transaction_no_def_error; 571 } else { 572 err = xs_printf(xst, node, "multi-queue-num-queues", 573 "%u", info->num_queues); 574 if (err != 0) { 575 message = "writing multi-queue-num-queues"; 576 goto abort_transaction; 577 } 578 579 for (i = 0; i < info->num_queues; i++) { 580 err = write_queue_xenstore_keys(dev, &info->rxq[i], 581 &info->txq[i], &xst, true); 582 if (err != 0) 583 goto abort_transaction_no_def_error; 584 } 585 } 586 587 err = xs_printf(xst, node, "request-rx-copy", "%u", 1); 588 if (err != 0) { 589 message = "writing request-rx-copy"; 590 goto abort_transaction; 591 } 592 err = xs_printf(xst, node, "feature-rx-notify", "%d", 1); 593 if (err != 0) { 594 message = "writing feature-rx-notify"; 595 goto abort_transaction; 596 } 597 err = xs_printf(xst, node, "feature-sg", "%d", 1); 598 if (err != 0) { 599 message = "writing feature-sg"; 600 goto abort_transaction; 601 } 602 err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1); 603 if (err != 0) { 604 message = "writing feature-gso-tcpv4"; 605 goto abort_transaction; 606 } 607 608 err = xs_transaction_end(xst, 0); 609 if (err != 0) { 610 if (err == EAGAIN) 611 goto again; 612 xenbus_dev_fatal(dev, err, "completing transaction"); 613 goto free; 614 } 615 616 return 0; 617 618 abort_transaction: 619 xenbus_dev_fatal(dev, err, "%s", message); 620 abort_transaction_no_def_error: 621 xs_transaction_end(xst, 1); 622 free: 623 netif_free(info); 624 out: 625 return (err); 626 } 627 628 static void 629 xn_rxq_tq_intr(void *xrxq, int pending) 630 { 631 struct netfront_rxq *rxq = xrxq; 632 633 XN_RX_LOCK(rxq); 634 xn_rxeof(rxq); 635 XN_RX_UNLOCK(rxq); 636 } 637 638 static void 639 xn_txq_start(struct netfront_txq *txq) 640 { 641 struct netfront_info *np = txq->info; 642 struct ifnet *ifp = np->xn_ifp; 643 644 XN_TX_LOCK_ASSERT(txq); 645 if (!drbr_empty(ifp, txq->br)) 646 xn_txq_mq_start_locked(txq, NULL); 647 } 648 649 static void 650 xn_txq_tq_intr(void *xtxq, int pending) 651 { 652 struct netfront_txq *txq = xtxq; 653 654 XN_TX_LOCK(txq); 655 if (RING_HAS_UNCONSUMED_RESPONSES(&txq->ring)) 656 xn_txeof(txq); 657 xn_txq_start(txq); 658 XN_TX_UNLOCK(txq); 659 } 660 661 static void 662 xn_txq_tq_deferred(void *xtxq, int pending) 663 { 664 struct netfront_txq *txq = xtxq; 665 666 XN_TX_LOCK(txq); 667 xn_txq_start(txq); 668 XN_TX_UNLOCK(txq); 669 } 670 671 static void 672 disconnect_rxq(struct netfront_rxq *rxq) 673 { 674 675 xn_release_rx_bufs(rxq); 676 gnttab_free_grant_references(rxq->gref_head); 677 gnttab_end_foreign_access_ref(rxq->ring_ref); 678 /* 679 * No split event channel support at the moment, handle will 680 * be unbound in tx. So no need to call xen_intr_unbind here, 681 * but we do want to reset the handler to 0. 682 */ 683 rxq->xen_intr_handle = 0; 684 } 685 686 static void 687 destroy_rxq(struct netfront_rxq *rxq) 688 { 689 690 free(rxq->ring.sring, M_DEVBUF); 691 taskqueue_drain_all(rxq->tq); 692 taskqueue_free(rxq->tq); 693 } 694 695 static void 696 destroy_rxqs(struct netfront_info *np) 697 { 698 int i; 699 700 for (i = 0; i < np->num_queues; i++) 701 destroy_rxq(&np->rxq[i]); 702 703 free(np->rxq, M_DEVBUF); 704 np->rxq = NULL; 705 } 706 707 static int 708 setup_rxqs(device_t dev, struct netfront_info *info, 709 unsigned long num_queues) 710 { 711 int q, i; 712 int error; 713 netif_rx_sring_t *rxs; 714 struct netfront_rxq *rxq; 715 716 info->rxq = malloc(sizeof(struct netfront_rxq) * num_queues, 717 M_DEVBUF, M_WAITOK|M_ZERO); 718 719 for (q = 0; q < num_queues; q++) { 720 rxq = &info->rxq[q]; 721 722 rxq->id = q; 723 rxq->info = info; 724 rxq->target = RX_MIN_TARGET; 725 rxq->ring_ref = GRANT_REF_INVALID; 726 rxq->ring.sring = NULL; 727 snprintf(rxq->name, XN_QUEUE_NAME_LEN, "xnrx_%u", q); 728 mtx_init(&rxq->lock, rxq->name, "netfront receive lock", 729 MTX_DEF); 730 731 for (i = 0; i <= NET_RX_RING_SIZE; i++) { 732 rxq->mbufs[i] = NULL; 733 rxq->grant_ref[i] = GRANT_REF_INVALID; 734 } 735 736 mbufq_init(&rxq->batch, INT_MAX); 737 738 /* Start resources allocation */ 739 740 if (gnttab_alloc_grant_references(RX_MAX_TARGET, 741 &rxq->gref_head) != 0) { 742 device_printf(dev, "allocating rx gref"); 743 error = ENOMEM; 744 goto fail; 745 } 746 747 rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, 748 M_WAITOK|M_ZERO); 749 SHARED_RING_INIT(rxs); 750 FRONT_RING_INIT(&rxq->ring, rxs, PAGE_SIZE); 751 752 error = xenbus_grant_ring(dev, virt_to_mfn(rxs), 753 &rxq->ring_ref); 754 if (error != 0) { 755 device_printf(dev, "granting rx ring page"); 756 goto fail_grant_ring; 757 } 758 759 TASK_INIT(&rxq->intrtask, 0, xn_rxq_tq_intr, rxq); 760 rxq->tq = taskqueue_create_fast(rxq->name, M_WAITOK, 761 taskqueue_thread_enqueue, &rxq->tq); 762 763 error = taskqueue_start_threads(&rxq->tq, 1, PI_NET, 764 "%s rxq %d", device_get_nameunit(dev), rxq->id); 765 if (error != 0) { 766 device_printf(dev, "failed to start rx taskq %d\n", 767 rxq->id); 768 goto fail_start_thread; 769 } 770 } 771 772 return (0); 773 774 fail_start_thread: 775 gnttab_end_foreign_access_ref(rxq->ring_ref); 776 taskqueue_drain_all(rxq->tq); 777 taskqueue_free(rxq->tq); 778 fail_grant_ring: 779 gnttab_free_grant_references(rxq->gref_head); 780 free(rxq->ring.sring, M_DEVBUF); 781 fail: 782 for (; q >= 0; q--) { 783 disconnect_rxq(&info->rxq[q]); 784 destroy_rxq(&info->rxq[q]); 785 } 786 787 free(info->rxq, M_DEVBUF); 788 return (error); 789 } 790 791 static void 792 disconnect_txq(struct netfront_txq *txq) 793 { 794 795 xn_release_tx_bufs(txq); 796 gnttab_free_grant_references(txq->gref_head); 797 gnttab_end_foreign_access_ref(txq->ring_ref); 798 xen_intr_unbind(&txq->xen_intr_handle); 799 } 800 801 static void 802 destroy_txq(struct netfront_txq *txq) 803 { 804 805 free(txq->ring.sring, M_DEVBUF); 806 buf_ring_free(txq->br, M_DEVBUF); 807 taskqueue_drain_all(txq->tq); 808 taskqueue_free(txq->tq); 809 } 810 811 static void 812 destroy_txqs(struct netfront_info *np) 813 { 814 int i; 815 816 for (i = 0; i < np->num_queues; i++) 817 destroy_txq(&np->txq[i]); 818 819 free(np->txq, M_DEVBUF); 820 np->txq = NULL; 821 } 822 823 static int 824 setup_txqs(device_t dev, struct netfront_info *info, 825 unsigned long num_queues) 826 { 827 int q, i; 828 int error; 829 netif_tx_sring_t *txs; 830 struct netfront_txq *txq; 831 832 info->txq = malloc(sizeof(struct netfront_txq) * num_queues, 833 M_DEVBUF, M_WAITOK|M_ZERO); 834 835 for (q = 0; q < num_queues; q++) { 836 txq = &info->txq[q]; 837 838 txq->id = q; 839 txq->info = info; 840 841 txq->ring_ref = GRANT_REF_INVALID; 842 txq->ring.sring = NULL; 843 844 snprintf(txq->name, XN_QUEUE_NAME_LEN, "xntx_%u", q); 845 846 mtx_init(&txq->lock, txq->name, "netfront transmit lock", 847 MTX_DEF); 848 849 for (i = 0; i <= NET_TX_RING_SIZE; i++) { 850 txq->mbufs[i] = (void *) ((u_long) i+1); 851 txq->grant_ref[i] = GRANT_REF_INVALID; 852 } 853 txq->mbufs[NET_TX_RING_SIZE] = (void *)0; 854 855 /* Start resources allocation. */ 856 857 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, 858 &txq->gref_head) != 0) { 859 device_printf(dev, "failed to allocate tx grant refs\n"); 860 error = ENOMEM; 861 goto fail; 862 } 863 864 txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, 865 M_WAITOK|M_ZERO); 866 SHARED_RING_INIT(txs); 867 FRONT_RING_INIT(&txq->ring, txs, PAGE_SIZE); 868 869 error = xenbus_grant_ring(dev, virt_to_mfn(txs), 870 &txq->ring_ref); 871 if (error != 0) { 872 device_printf(dev, "failed to grant tx ring\n"); 873 goto fail_grant_ring; 874 } 875 876 txq->br = buf_ring_alloc(NET_TX_RING_SIZE, M_DEVBUF, 877 M_WAITOK, &txq->lock); 878 TASK_INIT(&txq->defrtask, 0, xn_txq_tq_deferred, txq); 879 TASK_INIT(&txq->intrtask, 0, xn_txq_tq_intr, txq); 880 881 txq->tq = taskqueue_create_fast(txq->name, M_WAITOK, 882 taskqueue_thread_enqueue, &txq->tq); 883 884 error = taskqueue_start_threads(&txq->tq, 1, PI_NET, 885 "%s txq %d", device_get_nameunit(dev), txq->id); 886 if (error != 0) { 887 device_printf(dev, "failed to start tx taskq %d\n", 888 txq->id); 889 goto fail_start_thread; 890 } 891 892 error = xen_intr_alloc_and_bind_local_port(dev, 893 xenbus_get_otherend_id(dev), xn_intr, /* handler */ NULL, 894 &info->txq[q], 895 INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY, 896 &txq->xen_intr_handle); 897 898 if (error != 0) { 899 device_printf(dev, "xen_intr_alloc_and_bind_local_port failed\n"); 900 goto fail_bind_port; 901 } 902 } 903 904 return (0); 905 906 fail_bind_port: 907 taskqueue_drain_all(txq->tq); 908 fail_start_thread: 909 buf_ring_free(txq->br, M_DEVBUF); 910 taskqueue_free(txq->tq); 911 gnttab_end_foreign_access_ref(txq->ring_ref); 912 fail_grant_ring: 913 gnttab_free_grant_references(txq->gref_head); 914 free(txq->ring.sring, M_DEVBUF); 915 fail: 916 for (; q >= 0; q--) { 917 disconnect_txq(&info->txq[q]); 918 destroy_txq(&info->txq[q]); 919 } 920 921 free(info->txq, M_DEVBUF); 922 return (error); 923 } 924 925 static int 926 setup_device(device_t dev, struct netfront_info *info, 927 unsigned long num_queues) 928 { 929 int error; 930 int q; 931 932 if (info->txq) 933 destroy_txqs(info); 934 935 if (info->rxq) 936 destroy_rxqs(info); 937 938 info->num_queues = 0; 939 940 error = setup_rxqs(dev, info, num_queues); 941 if (error != 0) 942 goto out; 943 error = setup_txqs(dev, info, num_queues); 944 if (error != 0) 945 goto out; 946 947 info->num_queues = num_queues; 948 949 /* No split event channel at the moment. */ 950 for (q = 0; q < num_queues; q++) 951 info->rxq[q].xen_intr_handle = info->txq[q].xen_intr_handle; 952 953 return (0); 954 955 out: 956 KASSERT(error != 0, ("Error path taken without providing an error code")); 957 return (error); 958 } 959 960 #ifdef INET 961 /** 962 * If this interface has an ipv4 address, send an arp for it. This 963 * helps to get the network going again after migrating hosts. 964 */ 965 static void 966 netfront_send_fake_arp(device_t dev, struct netfront_info *info) 967 { 968 struct ifnet *ifp; 969 struct ifaddr *ifa; 970 971 ifp = info->xn_ifp; 972 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 973 if (ifa->ifa_addr->sa_family == AF_INET) { 974 arp_ifinit(ifp, ifa); 975 } 976 } 977 } 978 #endif 979 980 /** 981 * Callback received when the backend's state changes. 982 */ 983 static void 984 netfront_backend_changed(device_t dev, XenbusState newstate) 985 { 986 struct netfront_info *sc = device_get_softc(dev); 987 988 DPRINTK("newstate=%d\n", newstate); 989 990 switch (newstate) { 991 case XenbusStateInitialising: 992 case XenbusStateInitialised: 993 case XenbusStateUnknown: 994 case XenbusStateClosed: 995 case XenbusStateReconfigured: 996 case XenbusStateReconfiguring: 997 break; 998 case XenbusStateInitWait: 999 if (xenbus_get_state(dev) != XenbusStateInitialising) 1000 break; 1001 if (xn_connect(sc) != 0) 1002 break; 1003 xenbus_set_state(dev, XenbusStateConnected); 1004 break; 1005 case XenbusStateClosing: 1006 xenbus_set_state(dev, XenbusStateClosed); 1007 break; 1008 case XenbusStateConnected: 1009 #ifdef INET 1010 netfront_send_fake_arp(dev, sc); 1011 #endif 1012 break; 1013 } 1014 } 1015 1016 /** 1017 * \brief Verify that there is sufficient space in the Tx ring 1018 * buffer for a maximally sized request to be enqueued. 1019 * 1020 * A transmit request requires a transmit descriptor for each packet 1021 * fragment, plus up to 2 entries for "options" (e.g. TSO). 1022 */ 1023 static inline int 1024 xn_tx_slot_available(struct netfront_txq *txq) 1025 { 1026 1027 return (RING_FREE_REQUESTS(&txq->ring) > (MAX_TX_REQ_FRAGS + 2)); 1028 } 1029 1030 static void 1031 xn_release_tx_bufs(struct netfront_txq *txq) 1032 { 1033 int i; 1034 1035 for (i = 1; i <= NET_TX_RING_SIZE; i++) { 1036 struct mbuf *m; 1037 1038 m = txq->mbufs[i]; 1039 1040 /* 1041 * We assume that no kernel addresses are 1042 * less than NET_TX_RING_SIZE. Any entry 1043 * in the table that is below this number 1044 * must be an index from free-list tracking. 1045 */ 1046 if (((uintptr_t)m) <= NET_TX_RING_SIZE) 1047 continue; 1048 gnttab_end_foreign_access_ref(txq->grant_ref[i]); 1049 gnttab_release_grant_reference(&txq->gref_head, 1050 txq->grant_ref[i]); 1051 txq->grant_ref[i] = GRANT_REF_INVALID; 1052 add_id_to_freelist(txq->mbufs, i); 1053 txq->mbufs_cnt--; 1054 if (txq->mbufs_cnt < 0) { 1055 panic("%s: tx_chain_cnt must be >= 0", __func__); 1056 } 1057 m_free(m); 1058 } 1059 } 1060 1061 static void 1062 xn_alloc_rx_buffers(struct netfront_rxq *rxq) 1063 { 1064 struct netfront_info *np = rxq->info; 1065 int otherend_id = xenbus_get_otherend_id(np->xbdev); 1066 unsigned short id; 1067 struct mbuf *m_new; 1068 int i, batch_target, notify; 1069 RING_IDX req_prod; 1070 grant_ref_t ref; 1071 netif_rx_request_t *req; 1072 vm_offset_t vaddr; 1073 u_long pfn; 1074 1075 req_prod = rxq->ring.req_prod_pvt; 1076 1077 if (__predict_false(np->carrier == 0)) 1078 return; 1079 1080 /* 1081 * Allocate mbufs greedily, even though we batch updates to the 1082 * receive ring. This creates a less bursty demand on the memory 1083 * allocator, and so should reduce the chance of failed allocation 1084 * requests both for ourself and for other kernel subsystems. 1085 * 1086 * Here we attempt to maintain rx_target buffers in flight, counting 1087 * buffers that we have yet to process in the receive ring. 1088 */ 1089 batch_target = rxq->target - (req_prod - rxq->ring.rsp_cons); 1090 for (i = mbufq_len(&rxq->batch); i < batch_target; i++) { 1091 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); 1092 if (m_new == NULL) { 1093 if (i != 0) 1094 goto refill; 1095 /* XXX set timer */ 1096 break; 1097 } 1098 m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE; 1099 1100 /* queue the mbufs allocated */ 1101 mbufq_enqueue(&rxq->batch, m_new); 1102 } 1103 1104 /* 1105 * If we've allocated at least half of our target number of entries, 1106 * submit them to the backend - we have enough to make the overhead 1107 * of submission worthwhile. Otherwise wait for more mbufs and 1108 * request entries to become available. 1109 */ 1110 if (i < (rxq->target/2)) { 1111 if (req_prod > rxq->ring.sring->req_prod) 1112 goto push; 1113 return; 1114 } 1115 1116 /* 1117 * Double floating fill target if we risked having the backend 1118 * run out of empty buffers for receive traffic. We define "running 1119 * low" as having less than a fourth of our target buffers free 1120 * at the time we refilled the queue. 1121 */ 1122 if ((req_prod - rxq->ring.sring->rsp_prod) < (rxq->target / 4)) { 1123 rxq->target *= 2; 1124 if (rxq->target > np->rx_max_target) 1125 rxq->target = np->rx_max_target; 1126 } 1127 1128 refill: 1129 for (i = 0; ; i++) { 1130 if ((m_new = mbufq_dequeue(&rxq->batch)) == NULL) 1131 break; 1132 1133 m_new->m_ext.ext_arg1 = (vm_paddr_t *)(uintptr_t)( 1134 vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT); 1135 1136 id = xn_rxidx(req_prod + i); 1137 1138 KASSERT(rxq->mbufs[id] == NULL, ("non-NULL xn_rx_chain")); 1139 rxq->mbufs[id] = m_new; 1140 1141 ref = gnttab_claim_grant_reference(&rxq->gref_head); 1142 KASSERT(ref != GNTTAB_LIST_END, 1143 ("reserved grant references exhuasted")); 1144 rxq->grant_ref[id] = ref; 1145 1146 vaddr = mtod(m_new, vm_offset_t); 1147 pfn = vtophys(vaddr) >> PAGE_SHIFT; 1148 req = RING_GET_REQUEST(&rxq->ring, req_prod + i); 1149 1150 gnttab_grant_foreign_access_ref(ref, otherend_id, pfn, 0); 1151 req->id = id; 1152 req->gref = ref; 1153 1154 rxq->pfn_array[i] = 1155 vtophys(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT; 1156 } 1157 1158 KASSERT(i, ("no mbufs processed")); /* should have returned earlier */ 1159 KASSERT(mbufq_len(&rxq->batch) == 0, ("not all mbufs processed")); 1160 /* 1161 * We may have allocated buffers which have entries outstanding 1162 * in the page * update queue -- make sure we flush those first! 1163 */ 1164 wmb(); 1165 1166 /* Above is a suitable barrier to ensure backend will see requests. */ 1167 rxq->ring.req_prod_pvt = req_prod + i; 1168 push: 1169 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rxq->ring, notify); 1170 if (notify) 1171 xen_intr_signal(rxq->xen_intr_handle); 1172 } 1173 1174 static void 1175 xn_release_rx_bufs(struct netfront_rxq *rxq) 1176 { 1177 int i, ref; 1178 struct mbuf *m; 1179 1180 for (i = 0; i < NET_RX_RING_SIZE; i++) { 1181 m = rxq->mbufs[i]; 1182 1183 if (m == NULL) 1184 continue; 1185 1186 ref = rxq->grant_ref[i]; 1187 if (ref == GRANT_REF_INVALID) 1188 continue; 1189 1190 gnttab_end_foreign_access_ref(ref); 1191 gnttab_release_grant_reference(&rxq->gref_head, ref); 1192 rxq->mbufs[i] = NULL; 1193 rxq->grant_ref[i] = GRANT_REF_INVALID; 1194 m_freem(m); 1195 } 1196 } 1197 1198 static void 1199 xn_rxeof(struct netfront_rxq *rxq) 1200 { 1201 struct ifnet *ifp; 1202 struct netfront_info *np = rxq->info; 1203 #if (defined(INET) || defined(INET6)) 1204 struct lro_ctrl *lro = &rxq->lro; 1205 struct lro_entry *queued; 1206 #endif 1207 struct netfront_rx_info rinfo; 1208 struct netif_rx_response *rx = &rinfo.rx; 1209 struct netif_extra_info *extras = rinfo.extras; 1210 RING_IDX i, rp; 1211 struct mbuf *m; 1212 struct mbufq mbufq_rxq, mbufq_errq; 1213 int err, work_to_do; 1214 1215 do { 1216 XN_RX_LOCK_ASSERT(rxq); 1217 if (!netfront_carrier_ok(np)) 1218 return; 1219 1220 /* XXX: there should be some sane limit. */ 1221 mbufq_init(&mbufq_errq, INT_MAX); 1222 mbufq_init(&mbufq_rxq, INT_MAX); 1223 1224 ifp = np->xn_ifp; 1225 1226 rp = rxq->ring.sring->rsp_prod; 1227 rmb(); /* Ensure we see queued responses up to 'rp'. */ 1228 1229 i = rxq->ring.rsp_cons; 1230 while ((i != rp)) { 1231 memcpy(rx, RING_GET_RESPONSE(&rxq->ring, i), sizeof(*rx)); 1232 memset(extras, 0, sizeof(rinfo.extras)); 1233 1234 m = NULL; 1235 err = xn_get_responses(rxq, &rinfo, rp, &i, &m); 1236 1237 if (__predict_false(err)) { 1238 if (m) 1239 (void )mbufq_enqueue(&mbufq_errq, m); 1240 rxq->stats.rx_errors++; 1241 continue; 1242 } 1243 1244 m->m_pkthdr.rcvif = ifp; 1245 if ( rx->flags & NETRXF_data_validated ) { 1246 /* Tell the stack the checksums are okay */ 1247 /* 1248 * XXX this isn't necessarily the case - need to add 1249 * check 1250 */ 1251 1252 m->m_pkthdr.csum_flags |= 1253 (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID 1254 | CSUM_PSEUDO_HDR); 1255 m->m_pkthdr.csum_data = 0xffff; 1256 } 1257 1258 rxq->stats.rx_packets++; 1259 rxq->stats.rx_bytes += m->m_pkthdr.len; 1260 1261 (void )mbufq_enqueue(&mbufq_rxq, m); 1262 rxq->ring.rsp_cons = i; 1263 } 1264 1265 mbufq_drain(&mbufq_errq); 1266 1267 /* 1268 * Process all the mbufs after the remapping is complete. 1269 * Break the mbuf chain first though. 1270 */ 1271 while ((m = mbufq_dequeue(&mbufq_rxq)) != NULL) { 1272 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 1273 1274 /* XXX: Do we really need to drop the rx lock? */ 1275 XN_RX_UNLOCK(rxq); 1276 #if (defined(INET) || defined(INET6)) 1277 /* Use LRO if possible */ 1278 if ((ifp->if_capenable & IFCAP_LRO) == 0 || 1279 lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) { 1280 /* 1281 * If LRO fails, pass up to the stack 1282 * directly. 1283 */ 1284 (*ifp->if_input)(ifp, m); 1285 } 1286 #else 1287 (*ifp->if_input)(ifp, m); 1288 #endif 1289 1290 XN_RX_LOCK(rxq); 1291 } 1292 1293 rxq->ring.rsp_cons = i; 1294 1295 #if (defined(INET) || defined(INET6)) 1296 /* 1297 * Flush any outstanding LRO work 1298 */ 1299 while (!SLIST_EMPTY(&lro->lro_active)) { 1300 queued = SLIST_FIRST(&lro->lro_active); 1301 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1302 tcp_lro_flush(lro, queued); 1303 } 1304 #endif 1305 1306 xn_alloc_rx_buffers(rxq); 1307 1308 RING_FINAL_CHECK_FOR_RESPONSES(&rxq->ring, work_to_do); 1309 } while (work_to_do); 1310 } 1311 1312 static void 1313 xn_txeof(struct netfront_txq *txq) 1314 { 1315 RING_IDX i, prod; 1316 unsigned short id; 1317 struct ifnet *ifp; 1318 netif_tx_response_t *txr; 1319 struct mbuf *m; 1320 struct netfront_info *np = txq->info; 1321 1322 XN_TX_LOCK_ASSERT(txq); 1323 1324 if (!netfront_carrier_ok(np)) 1325 return; 1326 1327 ifp = np->xn_ifp; 1328 1329 do { 1330 prod = txq->ring.sring->rsp_prod; 1331 rmb(); /* Ensure we see responses up to 'rp'. */ 1332 1333 for (i = txq->ring.rsp_cons; i != prod; i++) { 1334 txr = RING_GET_RESPONSE(&txq->ring, i); 1335 if (txr->status == NETIF_RSP_NULL) 1336 continue; 1337 1338 if (txr->status != NETIF_RSP_OKAY) { 1339 printf("%s: WARNING: response is %d!\n", 1340 __func__, txr->status); 1341 } 1342 id = txr->id; 1343 m = txq->mbufs[id]; 1344 KASSERT(m != NULL, ("mbuf not found in chain")); 1345 KASSERT((uintptr_t)m > NET_TX_RING_SIZE, 1346 ("mbuf already on the free list, but we're " 1347 "trying to free it again!")); 1348 M_ASSERTVALID(m); 1349 1350 /* 1351 * Increment packet count if this is the last 1352 * mbuf of the chain. 1353 */ 1354 if (!m->m_next) 1355 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 1356 if (__predict_false(gnttab_query_foreign_access( 1357 txq->grant_ref[id]) != 0)) { 1358 panic("%s: grant id %u still in use by the " 1359 "backend", __func__, id); 1360 } 1361 gnttab_end_foreign_access_ref(txq->grant_ref[id]); 1362 gnttab_release_grant_reference( 1363 &txq->gref_head, txq->grant_ref[id]); 1364 txq->grant_ref[id] = GRANT_REF_INVALID; 1365 1366 txq->mbufs[id] = NULL; 1367 add_id_to_freelist(txq->mbufs, id); 1368 txq->mbufs_cnt--; 1369 m_free(m); 1370 /* Only mark the txq active if we've freed up at least one slot to try */ 1371 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1372 } 1373 txq->ring.rsp_cons = prod; 1374 1375 /* 1376 * Set a new event, then check for race with update of 1377 * tx_cons. Note that it is essential to schedule a 1378 * callback, no matter how few buffers are pending. Even if 1379 * there is space in the transmit ring, higher layers may 1380 * be blocked because too much data is outstanding: in such 1381 * cases notification from Xen is likely to be the only kick 1382 * that we'll get. 1383 */ 1384 txq->ring.sring->rsp_event = 1385 prod + ((txq->ring.sring->req_prod - prod) >> 1) + 1; 1386 1387 mb(); 1388 } while (prod != txq->ring.sring->rsp_prod); 1389 1390 if (txq->full && 1391 ((txq->ring.sring->req_prod - prod) < NET_TX_RING_SIZE)) { 1392 txq->full = false; 1393 taskqueue_enqueue(txq->tq, &txq->intrtask); 1394 } 1395 } 1396 1397 1398 static void 1399 xn_rxq_intr(void *xrxq) 1400 { 1401 struct netfront_rxq *rxq = xrxq; 1402 1403 taskqueue_enqueue(rxq->tq, &rxq->intrtask); 1404 } 1405 1406 static void 1407 xn_txq_intr(void *xtxq) 1408 { 1409 struct netfront_txq *txq = xtxq; 1410 1411 taskqueue_enqueue(txq->tq, &txq->intrtask); 1412 } 1413 1414 static int 1415 xn_intr(void *xsc) 1416 { 1417 struct netfront_txq *txq = xsc; 1418 struct netfront_info *np = txq->info; 1419 struct netfront_rxq *rxq = &np->rxq[txq->id]; 1420 1421 /* kick both tx and rx */ 1422 xn_rxq_intr(rxq); 1423 xn_txq_intr(txq); 1424 1425 return (FILTER_HANDLED); 1426 } 1427 1428 static void 1429 xn_move_rx_slot(struct netfront_rxq *rxq, struct mbuf *m, 1430 grant_ref_t ref) 1431 { 1432 int new = xn_rxidx(rxq->ring.req_prod_pvt); 1433 1434 KASSERT(rxq->mbufs[new] == NULL, ("mbufs != NULL")); 1435 rxq->mbufs[new] = m; 1436 rxq->grant_ref[new] = ref; 1437 RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->id = new; 1438 RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->gref = ref; 1439 rxq->ring.req_prod_pvt++; 1440 } 1441 1442 static int 1443 xn_get_extras(struct netfront_rxq *rxq, 1444 struct netif_extra_info *extras, RING_IDX rp, RING_IDX *cons) 1445 { 1446 struct netif_extra_info *extra; 1447 1448 int err = 0; 1449 1450 do { 1451 struct mbuf *m; 1452 grant_ref_t ref; 1453 1454 if (__predict_false(*cons + 1 == rp)) { 1455 err = EINVAL; 1456 break; 1457 } 1458 1459 extra = (struct netif_extra_info *) 1460 RING_GET_RESPONSE(&rxq->ring, ++(*cons)); 1461 1462 if (__predict_false(!extra->type || 1463 extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 1464 err = EINVAL; 1465 } else { 1466 memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); 1467 } 1468 1469 m = xn_get_rx_mbuf(rxq, *cons); 1470 ref = xn_get_rx_ref(rxq, *cons); 1471 xn_move_rx_slot(rxq, m, ref); 1472 } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); 1473 1474 return err; 1475 } 1476 1477 static int 1478 xn_get_responses(struct netfront_rxq *rxq, 1479 struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, 1480 struct mbuf **list) 1481 { 1482 struct netif_rx_response *rx = &rinfo->rx; 1483 struct netif_extra_info *extras = rinfo->extras; 1484 struct mbuf *m, *m0, *m_prev; 1485 grant_ref_t ref = xn_get_rx_ref(rxq, *cons); 1486 RING_IDX ref_cons = *cons; 1487 int frags = 1; 1488 int err = 0; 1489 u_long ret; 1490 1491 m0 = m = m_prev = xn_get_rx_mbuf(rxq, *cons); 1492 1493 if (rx->flags & NETRXF_extra_info) { 1494 err = xn_get_extras(rxq, extras, rp, cons); 1495 } 1496 1497 if (m0 != NULL) { 1498 m0->m_pkthdr.len = 0; 1499 m0->m_next = NULL; 1500 } 1501 1502 for (;;) { 1503 #if 0 1504 DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n", 1505 rx->status, rx->offset, frags); 1506 #endif 1507 if (__predict_false(rx->status < 0 || 1508 rx->offset + rx->status > PAGE_SIZE)) { 1509 1510 xn_move_rx_slot(rxq, m, ref); 1511 if (m0 == m) 1512 m0 = NULL; 1513 m = NULL; 1514 err = EINVAL; 1515 goto next_skip_queue; 1516 } 1517 1518 /* 1519 * This definitely indicates a bug, either in this driver or in 1520 * the backend driver. In future this should flag the bad 1521 * situation to the system controller to reboot the backed. 1522 */ 1523 if (ref == GRANT_REF_INVALID) { 1524 printf("%s: Bad rx response id %d.\n", __func__, rx->id); 1525 err = EINVAL; 1526 goto next; 1527 } 1528 1529 ret = gnttab_end_foreign_access_ref(ref); 1530 KASSERT(ret, ("Unable to end access to grant references")); 1531 1532 gnttab_release_grant_reference(&rxq->gref_head, ref); 1533 1534 next: 1535 if (m == NULL) 1536 break; 1537 1538 m->m_len = rx->status; 1539 m->m_data += rx->offset; 1540 m0->m_pkthdr.len += rx->status; 1541 1542 next_skip_queue: 1543 if (!(rx->flags & NETRXF_more_data)) 1544 break; 1545 1546 if (*cons + frags == rp) { 1547 if (net_ratelimit()) 1548 WPRINTK("Need more frags\n"); 1549 err = ENOENT; 1550 printf("%s: cons %u frags %u rp %u, not enough frags\n", 1551 __func__, *cons, frags, rp); 1552 break; 1553 } 1554 /* 1555 * Note that m can be NULL, if rx->status < 0 or if 1556 * rx->offset + rx->status > PAGE_SIZE above. 1557 */ 1558 m_prev = m; 1559 1560 rx = RING_GET_RESPONSE(&rxq->ring, *cons + frags); 1561 m = xn_get_rx_mbuf(rxq, *cons + frags); 1562 1563 /* 1564 * m_prev == NULL can happen if rx->status < 0 or if 1565 * rx->offset + * rx->status > PAGE_SIZE above. 1566 */ 1567 if (m_prev != NULL) 1568 m_prev->m_next = m; 1569 1570 /* 1571 * m0 can be NULL if rx->status < 0 or if * rx->offset + 1572 * rx->status > PAGE_SIZE above. 1573 */ 1574 if (m0 == NULL) 1575 m0 = m; 1576 m->m_next = NULL; 1577 ref = xn_get_rx_ref(rxq, *cons + frags); 1578 ref_cons = *cons + frags; 1579 frags++; 1580 } 1581 *list = m0; 1582 *cons += frags; 1583 1584 return (err); 1585 } 1586 1587 /** 1588 * \brief Count the number of fragments in an mbuf chain. 1589 * 1590 * Surprisingly, there isn't an M* macro for this. 1591 */ 1592 static inline int 1593 xn_count_frags(struct mbuf *m) 1594 { 1595 int nfrags; 1596 1597 for (nfrags = 0; m != NULL; m = m->m_next) 1598 nfrags++; 1599 1600 return (nfrags); 1601 } 1602 1603 /** 1604 * Given an mbuf chain, make sure we have enough room and then push 1605 * it onto the transmit ring. 1606 */ 1607 static int 1608 xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) 1609 { 1610 struct mbuf *m; 1611 struct netfront_info *np = txq->info; 1612 struct ifnet *ifp = np->xn_ifp; 1613 u_int nfrags; 1614 int otherend_id; 1615 1616 /** 1617 * Defragment the mbuf if necessary. 1618 */ 1619 nfrags = xn_count_frags(m_head); 1620 1621 /* 1622 * Check to see whether this request is longer than netback 1623 * can handle, and try to defrag it. 1624 */ 1625 /** 1626 * It is a bit lame, but the netback driver in Linux can't 1627 * deal with nfrags > MAX_TX_REQ_FRAGS, which is a quirk of 1628 * the Linux network stack. 1629 */ 1630 if (nfrags > np->maxfrags) { 1631 m = m_defrag(m_head, M_NOWAIT); 1632 if (!m) { 1633 /* 1634 * Defrag failed, so free the mbuf and 1635 * therefore drop the packet. 1636 */ 1637 m_freem(m_head); 1638 return (EMSGSIZE); 1639 } 1640 m_head = m; 1641 } 1642 1643 /* Determine how many fragments now exist */ 1644 nfrags = xn_count_frags(m_head); 1645 1646 /* 1647 * Check to see whether the defragmented packet has too many 1648 * segments for the Linux netback driver. 1649 */ 1650 /** 1651 * The FreeBSD TCP stack, with TSO enabled, can produce a chain 1652 * of mbufs longer than Linux can handle. Make sure we don't 1653 * pass a too-long chain over to the other side by dropping the 1654 * packet. It doesn't look like there is currently a way to 1655 * tell the TCP stack to generate a shorter chain of packets. 1656 */ 1657 if (nfrags > MAX_TX_REQ_FRAGS) { 1658 #ifdef DEBUG 1659 printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback " 1660 "won't be able to handle it, dropping\n", 1661 __func__, nfrags, MAX_TX_REQ_FRAGS); 1662 #endif 1663 m_freem(m_head); 1664 return (EMSGSIZE); 1665 } 1666 1667 /* 1668 * This check should be redundant. We've already verified that we 1669 * have enough slots in the ring to handle a packet of maximum 1670 * size, and that our packet is less than the maximum size. Keep 1671 * it in here as an assert for now just to make certain that 1672 * chain_cnt is accurate. 1673 */ 1674 KASSERT((txq->mbufs_cnt + nfrags) <= NET_TX_RING_SIZE, 1675 ("%s: chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE " 1676 "(%d)!", __func__, (int) txq->mbufs_cnt, 1677 (int) nfrags, (int) NET_TX_RING_SIZE)); 1678 1679 /* 1680 * Start packing the mbufs in this chain into 1681 * the fragment pointers. Stop when we run out 1682 * of fragments or hit the end of the mbuf chain. 1683 */ 1684 m = m_head; 1685 otherend_id = xenbus_get_otherend_id(np->xbdev); 1686 for (m = m_head; m; m = m->m_next) { 1687 netif_tx_request_t *tx; 1688 uintptr_t id; 1689 grant_ref_t ref; 1690 u_long mfn; /* XXX Wrong type? */ 1691 1692 tx = RING_GET_REQUEST(&txq->ring, txq->ring.req_prod_pvt); 1693 id = get_id_from_freelist(txq->mbufs); 1694 if (id == 0) 1695 panic("%s: was allocated the freelist head!\n", 1696 __func__); 1697 txq->mbufs_cnt++; 1698 if (txq->mbufs_cnt > NET_TX_RING_SIZE) 1699 panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n", 1700 __func__); 1701 txq->mbufs[id] = m; 1702 tx->id = id; 1703 ref = gnttab_claim_grant_reference(&txq->gref_head); 1704 KASSERT((short)ref >= 0, ("Negative ref")); 1705 mfn = virt_to_mfn(mtod(m, vm_offset_t)); 1706 gnttab_grant_foreign_access_ref(ref, otherend_id, 1707 mfn, GNTMAP_readonly); 1708 tx->gref = txq->grant_ref[id] = ref; 1709 tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1); 1710 tx->flags = 0; 1711 if (m == m_head) { 1712 /* 1713 * The first fragment has the entire packet 1714 * size, subsequent fragments have just the 1715 * fragment size. The backend works out the 1716 * true size of the first fragment by 1717 * subtracting the sizes of the other 1718 * fragments. 1719 */ 1720 tx->size = m->m_pkthdr.len; 1721 1722 /* 1723 * The first fragment contains the checksum flags 1724 * and is optionally followed by extra data for 1725 * TSO etc. 1726 */ 1727 /** 1728 * CSUM_TSO requires checksum offloading. 1729 * Some versions of FreeBSD fail to 1730 * set CSUM_TCP in the CSUM_TSO case, 1731 * so we have to test for CSUM_TSO 1732 * explicitly. 1733 */ 1734 if (m->m_pkthdr.csum_flags 1735 & (CSUM_DELAY_DATA | CSUM_TSO)) { 1736 tx->flags |= (NETTXF_csum_blank 1737 | NETTXF_data_validated); 1738 } 1739 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 1740 struct netif_extra_info *gso = 1741 (struct netif_extra_info *) 1742 RING_GET_REQUEST(&txq->ring, 1743 ++txq->ring.req_prod_pvt); 1744 1745 tx->flags |= NETTXF_extra_info; 1746 1747 gso->u.gso.size = m->m_pkthdr.tso_segsz; 1748 gso->u.gso.type = 1749 XEN_NETIF_GSO_TYPE_TCPV4; 1750 gso->u.gso.pad = 0; 1751 gso->u.gso.features = 0; 1752 1753 gso->type = XEN_NETIF_EXTRA_TYPE_GSO; 1754 gso->flags = 0; 1755 } 1756 } else { 1757 tx->size = m->m_len; 1758 } 1759 if (m->m_next) 1760 tx->flags |= NETTXF_more_data; 1761 1762 txq->ring.req_prod_pvt++; 1763 } 1764 BPF_MTAP(ifp, m_head); 1765 1766 xn_txeof(txq); 1767 1768 txq->stats.tx_bytes += m_head->m_pkthdr.len; 1769 txq->stats.tx_packets++; 1770 1771 return (0); 1772 } 1773 1774 /* equivalent of network_open() in Linux */ 1775 static void 1776 xn_ifinit_locked(struct netfront_info *np) 1777 { 1778 struct ifnet *ifp; 1779 int i; 1780 struct netfront_rxq *rxq; 1781 1782 XN_LOCK_ASSERT(np); 1783 1784 ifp = np->xn_ifp; 1785 1786 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1787 return; 1788 1789 xn_stop(np); 1790 1791 for (i = 0; i < np->num_queues; i++) { 1792 rxq = &np->rxq[i]; 1793 xn_alloc_rx_buffers(rxq); 1794 rxq->ring.sring->rsp_event = rxq->ring.rsp_cons + 1; 1795 } 1796 1797 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1798 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1799 if_link_state_change(ifp, LINK_STATE_UP); 1800 } 1801 1802 static void 1803 xn_ifinit(void *xsc) 1804 { 1805 struct netfront_info *sc = xsc; 1806 1807 XN_LOCK(sc); 1808 xn_ifinit_locked(sc); 1809 XN_UNLOCK(sc); 1810 } 1811 1812 static int 1813 xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1814 { 1815 struct netfront_info *sc = ifp->if_softc; 1816 struct ifreq *ifr = (struct ifreq *) data; 1817 #ifdef INET 1818 struct ifaddr *ifa = (struct ifaddr *)data; 1819 #endif 1820 1821 int mask, error = 0; 1822 switch(cmd) { 1823 case SIOCSIFADDR: 1824 #ifdef INET 1825 XN_LOCK(sc); 1826 if (ifa->ifa_addr->sa_family == AF_INET) { 1827 ifp->if_flags |= IFF_UP; 1828 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 1829 xn_ifinit_locked(sc); 1830 arp_ifinit(ifp, ifa); 1831 XN_UNLOCK(sc); 1832 } else { 1833 XN_UNLOCK(sc); 1834 #endif 1835 error = ether_ioctl(ifp, cmd, data); 1836 #ifdef INET 1837 } 1838 #endif 1839 break; 1840 case SIOCSIFMTU: 1841 ifp->if_mtu = ifr->ifr_mtu; 1842 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1843 xn_ifinit(sc); 1844 break; 1845 case SIOCSIFFLAGS: 1846 XN_LOCK(sc); 1847 if (ifp->if_flags & IFF_UP) { 1848 /* 1849 * If only the state of the PROMISC flag changed, 1850 * then just use the 'set promisc mode' command 1851 * instead of reinitializing the entire NIC. Doing 1852 * a full re-init means reloading the firmware and 1853 * waiting for it to start up, which may take a 1854 * second or two. 1855 */ 1856 xn_ifinit_locked(sc); 1857 } else { 1858 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1859 xn_stop(sc); 1860 } 1861 } 1862 sc->xn_if_flags = ifp->if_flags; 1863 XN_UNLOCK(sc); 1864 break; 1865 case SIOCSIFCAP: 1866 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 1867 if (mask & IFCAP_TXCSUM) { 1868 if (IFCAP_TXCSUM & ifp->if_capenable) { 1869 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 1870 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 1871 | CSUM_IP | CSUM_TSO); 1872 } else { 1873 ifp->if_capenable |= IFCAP_TXCSUM; 1874 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP 1875 | CSUM_IP); 1876 } 1877 } 1878 if (mask & IFCAP_RXCSUM) { 1879 ifp->if_capenable ^= IFCAP_RXCSUM; 1880 } 1881 if (mask & IFCAP_TSO4) { 1882 if (IFCAP_TSO4 & ifp->if_capenable) { 1883 ifp->if_capenable &= ~IFCAP_TSO4; 1884 ifp->if_hwassist &= ~CSUM_TSO; 1885 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 1886 ifp->if_capenable |= IFCAP_TSO4; 1887 ifp->if_hwassist |= CSUM_TSO; 1888 } else { 1889 IPRINTK("Xen requires tx checksum offload" 1890 " be enabled to use TSO\n"); 1891 error = EINVAL; 1892 } 1893 } 1894 if (mask & IFCAP_LRO) { 1895 ifp->if_capenable ^= IFCAP_LRO; 1896 1897 } 1898 break; 1899 case SIOCADDMULTI: 1900 case SIOCDELMULTI: 1901 break; 1902 case SIOCSIFMEDIA: 1903 case SIOCGIFMEDIA: 1904 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 1905 break; 1906 default: 1907 error = ether_ioctl(ifp, cmd, data); 1908 } 1909 1910 return (error); 1911 } 1912 1913 static void 1914 xn_stop(struct netfront_info *sc) 1915 { 1916 struct ifnet *ifp; 1917 1918 XN_LOCK_ASSERT(sc); 1919 1920 ifp = sc->xn_ifp; 1921 1922 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 1923 if_link_state_change(ifp, LINK_STATE_DOWN); 1924 } 1925 1926 static void 1927 xn_rebuild_rx_bufs(struct netfront_rxq *rxq) 1928 { 1929 int requeue_idx, i; 1930 grant_ref_t ref; 1931 netif_rx_request_t *req; 1932 1933 for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { 1934 struct mbuf *m; 1935 u_long pfn; 1936 1937 if (rxq->mbufs[i] == NULL) 1938 continue; 1939 1940 m = rxq->mbufs[requeue_idx] = xn_get_rx_mbuf(rxq, i); 1941 ref = rxq->grant_ref[requeue_idx] = xn_get_rx_ref(rxq, i); 1942 1943 req = RING_GET_REQUEST(&rxq->ring, requeue_idx); 1944 pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT; 1945 1946 gnttab_grant_foreign_access_ref(ref, 1947 xenbus_get_otherend_id(rxq->info->xbdev), 1948 pfn, 0); 1949 1950 req->gref = ref; 1951 req->id = requeue_idx; 1952 1953 requeue_idx++; 1954 } 1955 1956 rxq->ring.req_prod_pvt = requeue_idx; 1957 } 1958 1959 /* START of Xenolinux helper functions adapted to FreeBSD */ 1960 int 1961 xn_connect(struct netfront_info *np) 1962 { 1963 int i, error; 1964 u_int feature_rx_copy; 1965 struct netfront_rxq *rxq; 1966 struct netfront_txq *txq; 1967 1968 error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 1969 "feature-rx-copy", NULL, "%u", &feature_rx_copy); 1970 if (error != 0) 1971 feature_rx_copy = 0; 1972 1973 /* We only support rx copy. */ 1974 if (!feature_rx_copy) 1975 return (EPROTONOSUPPORT); 1976 1977 /* Recovery procedure: */ 1978 error = talk_to_backend(np->xbdev, np); 1979 if (error != 0) 1980 return (error); 1981 1982 /* Step 1: Reinitialise variables. */ 1983 xn_query_features(np); 1984 xn_configure_features(np); 1985 1986 /* Step 2: Release TX buffer */ 1987 for (i = 0; i < np->num_queues; i++) { 1988 txq = &np->txq[i]; 1989 xn_release_tx_bufs(txq); 1990 } 1991 1992 /* Step 3: Rebuild the RX buffer freelist and the RX ring itself. */ 1993 for (i = 0; i < np->num_queues; i++) { 1994 rxq = &np->rxq[i]; 1995 xn_rebuild_rx_bufs(rxq); 1996 } 1997 1998 /* Step 4: All public and private state should now be sane. Get 1999 * ready to start sending and receiving packets and give the driver 2000 * domain a kick because we've probably just requeued some 2001 * packets. 2002 */ 2003 netfront_carrier_on(np); 2004 for (i = 0; i < np->num_queues; i++) { 2005 txq = &np->txq[i]; 2006 xen_intr_signal(txq->xen_intr_handle); 2007 XN_TX_LOCK(txq); 2008 xn_txeof(txq); 2009 XN_TX_UNLOCK(txq); 2010 xn_alloc_rx_buffers(rxq); 2011 } 2012 2013 return (0); 2014 } 2015 2016 static void 2017 xn_query_features(struct netfront_info *np) 2018 { 2019 int val; 2020 2021 device_printf(np->xbdev, "backend features:"); 2022 2023 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2024 "feature-sg", NULL, "%d", &val) < 0) 2025 val = 0; 2026 2027 np->maxfrags = 1; 2028 if (val) { 2029 np->maxfrags = MAX_TX_REQ_FRAGS; 2030 printf(" feature-sg"); 2031 } 2032 2033 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2034 "feature-gso-tcpv4", NULL, "%d", &val) < 0) 2035 val = 0; 2036 2037 np->xn_ifp->if_capabilities &= ~(IFCAP_TSO4|IFCAP_LRO); 2038 if (val) { 2039 np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO; 2040 printf(" feature-gso-tcp4"); 2041 } 2042 2043 printf("\n"); 2044 } 2045 2046 static int 2047 xn_configure_features(struct netfront_info *np) 2048 { 2049 int err, cap_enabled; 2050 #if (defined(INET) || defined(INET6)) 2051 int i; 2052 #endif 2053 2054 err = 0; 2055 2056 if (np->xn_resume && 2057 ((np->xn_ifp->if_capenable & np->xn_ifp->if_capabilities) 2058 == np->xn_ifp->if_capenable)) { 2059 /* Current options are available, no need to do anything. */ 2060 return (0); 2061 } 2062 2063 /* Try to preserve as many options as possible. */ 2064 if (np->xn_resume) 2065 cap_enabled = np->xn_ifp->if_capenable; 2066 else 2067 cap_enabled = UINT_MAX; 2068 2069 #if (defined(INET) || defined(INET6)) 2070 for (i = 0; i < np->num_queues; i++) 2071 if ((np->xn_ifp->if_capenable & IFCAP_LRO) == 2072 (cap_enabled & IFCAP_LRO)) 2073 tcp_lro_free(&np->rxq[i].lro); 2074 #endif 2075 np->xn_ifp->if_capenable = 2076 np->xn_ifp->if_capabilities & ~(IFCAP_LRO|IFCAP_TSO4) & cap_enabled; 2077 np->xn_ifp->if_hwassist &= ~CSUM_TSO; 2078 #if (defined(INET) || defined(INET6)) 2079 for (i = 0; i < np->num_queues; i++) { 2080 if (xn_enable_lro && (np->xn_ifp->if_capabilities & IFCAP_LRO) == 2081 (cap_enabled & IFCAP_LRO)) { 2082 err = tcp_lro_init(&np->rxq[i].lro); 2083 if (err != 0) { 2084 device_printf(np->xbdev, "LRO initialization failed\n"); 2085 } else { 2086 np->rxq[i].lro.ifp = np->xn_ifp; 2087 np->xn_ifp->if_capenable |= IFCAP_LRO; 2088 } 2089 } 2090 } 2091 if ((np->xn_ifp->if_capabilities & IFCAP_TSO4) == 2092 (cap_enabled & IFCAP_TSO4)) { 2093 np->xn_ifp->if_capenable |= IFCAP_TSO4; 2094 np->xn_ifp->if_hwassist |= CSUM_TSO; 2095 } 2096 #endif 2097 return (err); 2098 } 2099 2100 static int 2101 xn_txq_mq_start_locked(struct netfront_txq *txq, struct mbuf *m) 2102 { 2103 struct netfront_info *np; 2104 struct ifnet *ifp; 2105 struct buf_ring *br; 2106 int error, notify; 2107 2108 np = txq->info; 2109 br = txq->br; 2110 ifp = np->xn_ifp; 2111 error = 0; 2112 2113 XN_TX_LOCK_ASSERT(txq); 2114 2115 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2116 !netfront_carrier_ok(np)) { 2117 if (m != NULL) 2118 error = drbr_enqueue(ifp, br, m); 2119 return (error); 2120 } 2121 2122 if (m != NULL) { 2123 error = drbr_enqueue(ifp, br, m); 2124 if (error != 0) 2125 return (error); 2126 } 2127 2128 while ((m = drbr_peek(ifp, br)) != NULL) { 2129 if (!xn_tx_slot_available(txq)) { 2130 drbr_putback(ifp, br, m); 2131 break; 2132 } 2133 2134 error = xn_assemble_tx_request(txq, m); 2135 /* xn_assemble_tx_request always consumes the mbuf*/ 2136 if (error != 0) { 2137 drbr_advance(ifp, br); 2138 break; 2139 } 2140 2141 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&txq->ring, notify); 2142 if (notify) 2143 xen_intr_signal(txq->xen_intr_handle); 2144 2145 drbr_advance(ifp, br); 2146 } 2147 2148 if (RING_FULL(&txq->ring)) 2149 txq->full = true; 2150 2151 return (0); 2152 } 2153 2154 static int 2155 xn_txq_mq_start(struct ifnet *ifp, struct mbuf *m) 2156 { 2157 struct netfront_info *np; 2158 struct netfront_txq *txq; 2159 int i, npairs, error; 2160 2161 np = ifp->if_softc; 2162 npairs = np->num_queues; 2163 2164 /* check if flowid is set */ 2165 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 2166 i = m->m_pkthdr.flowid % npairs; 2167 else 2168 i = curcpu % npairs; 2169 2170 txq = &np->txq[i]; 2171 2172 if (XN_TX_TRYLOCK(txq) != 0) { 2173 error = xn_txq_mq_start_locked(txq, m); 2174 XN_TX_UNLOCK(txq); 2175 } else { 2176 error = drbr_enqueue(ifp, txq->br, m); 2177 taskqueue_enqueue(txq->tq, &txq->defrtask); 2178 } 2179 2180 return (error); 2181 } 2182 2183 static void 2184 xn_qflush(struct ifnet *ifp) 2185 { 2186 struct netfront_info *np; 2187 struct netfront_txq *txq; 2188 struct mbuf *m; 2189 int i; 2190 2191 np = ifp->if_softc; 2192 2193 for (i = 0; i < np->num_queues; i++) { 2194 txq = &np->txq[i]; 2195 2196 XN_TX_LOCK(txq); 2197 while ((m = buf_ring_dequeue_sc(txq->br)) != NULL) 2198 m_freem(m); 2199 XN_TX_UNLOCK(txq); 2200 } 2201 2202 if_qflush(ifp); 2203 } 2204 2205 /** 2206 * Create a network device. 2207 * @param dev Newbus device representing this virtual NIC. 2208 */ 2209 int 2210 create_netdev(device_t dev) 2211 { 2212 struct netfront_info *np; 2213 int err; 2214 struct ifnet *ifp; 2215 2216 np = device_get_softc(dev); 2217 2218 np->xbdev = dev; 2219 2220 mtx_init(&np->sc_lock, "xnsc", "netfront softc lock", MTX_DEF); 2221 2222 ifmedia_init(&np->sc_media, 0, xn_ifmedia_upd, xn_ifmedia_sts); 2223 ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); 2224 ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL); 2225 2226 np->rx_min_target = RX_MIN_TARGET; 2227 np->rx_max_target = RX_MAX_TARGET; 2228 2229 err = xen_net_read_mac(dev, np->mac); 2230 if (err != 0) 2231 goto error; 2232 2233 /* Set up ifnet structure */ 2234 ifp = np->xn_ifp = if_alloc(IFT_ETHER); 2235 ifp->if_softc = np; 2236 if_initname(ifp, "xn", device_get_unit(dev)); 2237 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 2238 ifp->if_ioctl = xn_ioctl; 2239 2240 ifp->if_transmit = xn_txq_mq_start; 2241 ifp->if_qflush = xn_qflush; 2242 2243 ifp->if_init = xn_ifinit; 2244 2245 ifp->if_hwassist = XN_CSUM_FEATURES; 2246 ifp->if_capabilities = IFCAP_HWCSUM; 2247 ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 2248 ifp->if_hw_tsomaxsegcount = MAX_TX_REQ_FRAGS; 2249 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 2250 2251 ether_ifattach(ifp, np->mac); 2252 netfront_carrier_off(np); 2253 2254 return (0); 2255 2256 error: 2257 KASSERT(err != 0, ("Error path with no error code specified")); 2258 return (err); 2259 } 2260 2261 static int 2262 netfront_detach(device_t dev) 2263 { 2264 struct netfront_info *info = device_get_softc(dev); 2265 2266 DPRINTK("%s\n", xenbus_get_node(dev)); 2267 2268 netif_free(info); 2269 2270 return 0; 2271 } 2272 2273 static void 2274 netif_free(struct netfront_info *np) 2275 { 2276 2277 XN_LOCK(np); 2278 xn_stop(np); 2279 XN_UNLOCK(np); 2280 netif_disconnect_backend(np); 2281 free(np->rxq, M_DEVBUF); 2282 free(np->txq, M_DEVBUF); 2283 ether_ifdetach(np->xn_ifp); 2284 if_free(np->xn_ifp); 2285 np->xn_ifp = NULL; 2286 ifmedia_removeall(&np->sc_media); 2287 } 2288 2289 static void 2290 netif_disconnect_backend(struct netfront_info *np) 2291 { 2292 u_int i; 2293 2294 for (i = 0; i < np->num_queues; i++) { 2295 XN_RX_LOCK(&np->rxq[i]); 2296 XN_TX_LOCK(&np->txq[i]); 2297 } 2298 netfront_carrier_off(np); 2299 for (i = 0; i < np->num_queues; i++) { 2300 XN_RX_UNLOCK(&np->rxq[i]); 2301 XN_TX_UNLOCK(&np->txq[i]); 2302 } 2303 2304 for (i = 0; i < np->num_queues; i++) { 2305 disconnect_rxq(&np->rxq[i]); 2306 disconnect_txq(&np->txq[i]); 2307 } 2308 } 2309 2310 static int 2311 xn_ifmedia_upd(struct ifnet *ifp) 2312 { 2313 2314 return (0); 2315 } 2316 2317 static void 2318 xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 2319 { 2320 2321 ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE; 2322 ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; 2323 } 2324 2325 /* ** Driver registration ** */ 2326 static device_method_t netfront_methods[] = { 2327 /* Device interface */ 2328 DEVMETHOD(device_probe, netfront_probe), 2329 DEVMETHOD(device_attach, netfront_attach), 2330 DEVMETHOD(device_detach, netfront_detach), 2331 DEVMETHOD(device_shutdown, bus_generic_shutdown), 2332 DEVMETHOD(device_suspend, netfront_suspend), 2333 DEVMETHOD(device_resume, netfront_resume), 2334 2335 /* Xenbus interface */ 2336 DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed), 2337 2338 DEVMETHOD_END 2339 }; 2340 2341 static driver_t netfront_driver = { 2342 "xn", 2343 netfront_methods, 2344 sizeof(struct netfront_info), 2345 }; 2346 devclass_t netfront_devclass; 2347 2348 DRIVER_MODULE(xe, xenbusb_front, netfront_driver, netfront_devclass, NULL, 2349 NULL); 2350