1 /*- 2 * Copyright (c) 2004-2006 Kip Macy 3 * Copyright (c) 2015 Wei Liu <wei.liu2@citrix.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/sockio.h> 36 #include <sys/limits.h> 37 #include <sys/mbuf.h> 38 #include <sys/malloc.h> 39 #include <sys/module.h> 40 #include <sys/kernel.h> 41 #include <sys/socket.h> 42 #include <sys/sysctl.h> 43 #include <sys/taskqueue.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_arp.h> 48 #include <net/ethernet.h> 49 #include <net/if_media.h> 50 #include <net/bpf.h> 51 #include <net/if_types.h> 52 53 #include <netinet/in.h> 54 #include <netinet/ip.h> 55 #include <netinet/if_ether.h> 56 #include <netinet/tcp.h> 57 #include <netinet/tcp_lro.h> 58 59 #include <vm/vm.h> 60 #include <vm/pmap.h> 61 62 #include <sys/bus.h> 63 64 #include <xen/xen-os.h> 65 #include <xen/hypervisor.h> 66 #include <xen/xen_intr.h> 67 #include <xen/gnttab.h> 68 #include <xen/interface/memory.h> 69 #include <xen/interface/io/netif.h> 70 #include <xen/xenbus/xenbusvar.h> 71 72 #include "xenbus_if.h" 73 74 /* Features supported by all backends. TSO and LRO can be negotiated */ 75 #define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) 76 77 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) 78 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) 79 80 #define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1) 81 82 /* 83 * Should the driver do LRO on the RX end 84 * this can be toggled on the fly, but the 85 * interface must be reset (down/up) for it 86 * to take effect. 87 */ 88 static int xn_enable_lro = 1; 89 TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro); 90 91 /* 92 * Number of pairs of queues. 93 */ 94 static unsigned long xn_num_queues = 4; 95 TUNABLE_ULONG("hw.xn.num_queues", &xn_num_queues); 96 97 /** 98 * \brief The maximum allowed data fragments in a single transmit 99 * request. 100 * 101 * This limit is imposed by the backend driver. We assume here that 102 * we are dealing with a Linux driver domain and have set our limit 103 * to mirror the Linux MAX_SKB_FRAGS constant. 104 */ 105 #define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2) 106 107 #define RX_COPY_THRESHOLD 256 108 109 #define net_ratelimit() 0 110 111 struct netfront_rxq; 112 struct netfront_txq; 113 struct netfront_info; 114 struct netfront_rx_info; 115 116 static void xn_txeof(struct netfront_txq *); 117 static void xn_rxeof(struct netfront_rxq *); 118 static void xn_alloc_rx_buffers(struct netfront_rxq *); 119 static void xn_alloc_rx_buffers_callout(void *arg); 120 121 static void xn_release_rx_bufs(struct netfront_rxq *); 122 static void xn_release_tx_bufs(struct netfront_txq *); 123 124 static void xn_rxq_intr(struct netfront_rxq *); 125 static void xn_txq_intr(struct netfront_txq *); 126 static void xn_intr(void *); 127 static inline int xn_count_frags(struct mbuf *m); 128 static int xn_assemble_tx_request(struct netfront_txq *, struct mbuf *); 129 static int xn_ioctl(struct ifnet *, u_long, caddr_t); 130 static void xn_ifinit_locked(struct netfront_info *); 131 static void xn_ifinit(void *); 132 static void xn_stop(struct netfront_info *); 133 static void xn_query_features(struct netfront_info *np); 134 static int xn_configure_features(struct netfront_info *np); 135 static void netif_free(struct netfront_info *info); 136 static int netfront_detach(device_t dev); 137 138 static int xn_txq_mq_start_locked(struct netfront_txq *, struct mbuf *); 139 static int xn_txq_mq_start(struct ifnet *, struct mbuf *); 140 141 static int talk_to_backend(device_t dev, struct netfront_info *info); 142 static int create_netdev(device_t dev); 143 static void netif_disconnect_backend(struct netfront_info *info); 144 static int setup_device(device_t dev, struct netfront_info *info, 145 unsigned long); 146 static int xn_ifmedia_upd(struct ifnet *ifp); 147 static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); 148 149 int xn_connect(struct netfront_info *); 150 151 static int xn_get_responses(struct netfront_rxq *, 152 struct netfront_rx_info *, RING_IDX, RING_IDX *, 153 struct mbuf **); 154 155 #define virt_to_mfn(x) (vtophys(x) >> PAGE_SHIFT) 156 157 #define INVALID_P2M_ENTRY (~0UL) 158 159 struct xn_rx_stats 160 { 161 u_long rx_packets; /* total packets received */ 162 u_long rx_bytes; /* total bytes received */ 163 u_long rx_errors; /* bad packets received */ 164 }; 165 166 struct xn_tx_stats 167 { 168 u_long tx_packets; /* total packets transmitted */ 169 u_long tx_bytes; /* total bytes transmitted */ 170 u_long tx_errors; /* packet transmit problems */ 171 }; 172 173 #define XN_QUEUE_NAME_LEN 8 /* xn{t,r}x_%u, allow for two digits */ 174 struct netfront_rxq { 175 struct netfront_info *info; 176 u_int id; 177 char name[XN_QUEUE_NAME_LEN]; 178 struct mtx lock; 179 180 int ring_ref; 181 netif_rx_front_ring_t ring; 182 xen_intr_handle_t xen_intr_handle; 183 184 grant_ref_t gref_head; 185 grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; 186 187 struct mbuf *mbufs[NET_RX_RING_SIZE + 1]; 188 189 struct lro_ctrl lro; 190 191 struct callout rx_refill; 192 193 struct xn_rx_stats stats; 194 }; 195 196 struct netfront_txq { 197 struct netfront_info *info; 198 u_int id; 199 char name[XN_QUEUE_NAME_LEN]; 200 struct mtx lock; 201 202 int ring_ref; 203 netif_tx_front_ring_t ring; 204 xen_intr_handle_t xen_intr_handle; 205 206 grant_ref_t gref_head; 207 grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; 208 209 struct mbuf *mbufs[NET_TX_RING_SIZE + 1]; 210 int mbufs_cnt; 211 struct buf_ring *br; 212 213 struct taskqueue *tq; 214 struct task defrtask; 215 216 bool full; 217 218 struct xn_tx_stats stats; 219 }; 220 221 struct netfront_info { 222 struct ifnet *xn_ifp; 223 224 struct mtx sc_lock; 225 226 u_int num_queues; 227 struct netfront_rxq *rxq; 228 struct netfront_txq *txq; 229 230 u_int carrier; 231 u_int maxfrags; 232 233 device_t xbdev; 234 uint8_t mac[ETHER_ADDR_LEN]; 235 236 int xn_if_flags; 237 238 struct ifmedia sc_media; 239 240 bool xn_resume; 241 }; 242 243 struct netfront_rx_info { 244 struct netif_rx_response rx; 245 struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; 246 }; 247 248 #define XN_RX_LOCK(_q) mtx_lock(&(_q)->lock) 249 #define XN_RX_UNLOCK(_q) mtx_unlock(&(_q)->lock) 250 251 #define XN_TX_LOCK(_q) mtx_lock(&(_q)->lock) 252 #define XN_TX_TRYLOCK(_q) mtx_trylock(&(_q)->lock) 253 #define XN_TX_UNLOCK(_q) mtx_unlock(&(_q)->lock) 254 255 #define XN_LOCK(_sc) mtx_lock(&(_sc)->sc_lock); 256 #define XN_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_lock); 257 258 #define XN_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_lock, MA_OWNED); 259 #define XN_RX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); 260 #define XN_TX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); 261 262 #define netfront_carrier_on(netif) ((netif)->carrier = 1) 263 #define netfront_carrier_off(netif) ((netif)->carrier = 0) 264 #define netfront_carrier_ok(netif) ((netif)->carrier) 265 266 /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */ 267 268 static inline void 269 add_id_to_freelist(struct mbuf **list, uintptr_t id) 270 { 271 272 KASSERT(id != 0, 273 ("%s: the head item (0) must always be free.", __func__)); 274 list[id] = list[0]; 275 list[0] = (struct mbuf *)id; 276 } 277 278 static inline unsigned short 279 get_id_from_freelist(struct mbuf **list) 280 { 281 uintptr_t id; 282 283 id = (uintptr_t)list[0]; 284 KASSERT(id != 0, 285 ("%s: the head item (0) must always remain free.", __func__)); 286 list[0] = list[id]; 287 return (id); 288 } 289 290 static inline int 291 xn_rxidx(RING_IDX idx) 292 { 293 294 return idx & (NET_RX_RING_SIZE - 1); 295 } 296 297 static inline struct mbuf * 298 xn_get_rx_mbuf(struct netfront_rxq *rxq, RING_IDX ri) 299 { 300 int i; 301 struct mbuf *m; 302 303 i = xn_rxidx(ri); 304 m = rxq->mbufs[i]; 305 rxq->mbufs[i] = NULL; 306 return (m); 307 } 308 309 static inline grant_ref_t 310 xn_get_rx_ref(struct netfront_rxq *rxq, RING_IDX ri) 311 { 312 int i = xn_rxidx(ri); 313 grant_ref_t ref = rxq->grant_ref[i]; 314 315 KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n")); 316 rxq->grant_ref[i] = GRANT_REF_INVALID; 317 return (ref); 318 } 319 320 #define IPRINTK(fmt, args...) \ 321 printf("[XEN] " fmt, ##args) 322 #ifdef INVARIANTS 323 #define WPRINTK(fmt, args...) \ 324 printf("[XEN] " fmt, ##args) 325 #else 326 #define WPRINTK(fmt, args...) 327 #endif 328 #ifdef DEBUG 329 #define DPRINTK(fmt, args...) \ 330 printf("[XEN] %s: " fmt, __func__, ##args) 331 #else 332 #define DPRINTK(fmt, args...) 333 #endif 334 335 /** 336 * Read the 'mac' node at the given device's node in the store, and parse that 337 * as colon-separated octets, placing result the given mac array. mac must be 338 * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h). 339 * Return 0 on success, or errno on error. 340 */ 341 static int 342 xen_net_read_mac(device_t dev, uint8_t mac[]) 343 { 344 int error, i; 345 char *s, *e, *macstr; 346 const char *path; 347 348 path = xenbus_get_node(dev); 349 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 350 if (error == ENOENT) { 351 /* 352 * Deal with missing mac XenStore nodes on devices with 353 * HVM emulation (the 'ioemu' configuration attribute) 354 * enabled. 355 * 356 * The HVM emulator may execute in a stub device model 357 * domain which lacks the permission, only given to Dom0, 358 * to update the guest's XenStore tree. For this reason, 359 * the HVM emulator doesn't even attempt to write the 360 * front-side mac node, even when operating in Dom0. 361 * However, there should always be a mac listed in the 362 * backend tree. Fallback to this version if our query 363 * of the front side XenStore location doesn't find 364 * anything. 365 */ 366 path = xenbus_get_otherend_path(dev); 367 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 368 } 369 if (error != 0) { 370 xenbus_dev_fatal(dev, error, "parsing %s/mac", path); 371 return (error); 372 } 373 374 s = macstr; 375 for (i = 0; i < ETHER_ADDR_LEN; i++) { 376 mac[i] = strtoul(s, &e, 16); 377 if (s == e || (e[0] != ':' && e[0] != 0)) { 378 free(macstr, M_XENBUS); 379 return (ENOENT); 380 } 381 s = &e[1]; 382 } 383 free(macstr, M_XENBUS); 384 return (0); 385 } 386 387 /** 388 * Entry point to this code when a new device is created. Allocate the basic 389 * structures and the ring buffers for communication with the backend, and 390 * inform the backend of the appropriate details for those. Switch to 391 * Connected state. 392 */ 393 static int 394 netfront_probe(device_t dev) 395 { 396 397 if (xen_hvm_domain() && xen_disable_pv_nics != 0) 398 return (ENXIO); 399 400 if (!strcmp(xenbus_get_type(dev), "vif")) { 401 device_set_desc(dev, "Virtual Network Interface"); 402 return (0); 403 } 404 405 return (ENXIO); 406 } 407 408 static int 409 netfront_attach(device_t dev) 410 { 411 int err; 412 413 err = create_netdev(dev); 414 if (err != 0) { 415 xenbus_dev_fatal(dev, err, "creating netdev"); 416 return (err); 417 } 418 419 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), 420 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 421 OID_AUTO, "enable_lro", CTLFLAG_RW, 422 &xn_enable_lro, 0, "Large Receive Offload"); 423 424 SYSCTL_ADD_ULONG(device_get_sysctl_ctx(dev), 425 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 426 OID_AUTO, "num_queues", CTLFLAG_RD, 427 &xn_num_queues, "Number of pairs of queues"); 428 429 return (0); 430 } 431 432 static int 433 netfront_suspend(device_t dev) 434 { 435 struct netfront_info *np = device_get_softc(dev); 436 u_int i; 437 438 for (i = 0; i < np->num_queues; i++) { 439 XN_RX_LOCK(&np->rxq[i]); 440 XN_TX_LOCK(&np->txq[i]); 441 } 442 netfront_carrier_off(np); 443 for (i = 0; i < np->num_queues; i++) { 444 XN_RX_UNLOCK(&np->rxq[i]); 445 XN_TX_UNLOCK(&np->txq[i]); 446 } 447 return (0); 448 } 449 450 /** 451 * We are reconnecting to the backend, due to a suspend/resume, or a backend 452 * driver restart. We tear down our netif structure and recreate it, but 453 * leave the device-layer structures intact so that this is transparent to the 454 * rest of the kernel. 455 */ 456 static int 457 netfront_resume(device_t dev) 458 { 459 struct netfront_info *info = device_get_softc(dev); 460 461 info->xn_resume = true; 462 netif_disconnect_backend(info); 463 return (0); 464 } 465 466 static int 467 write_queue_xenstore_keys(device_t dev, 468 struct netfront_rxq *rxq, 469 struct netfront_txq *txq, 470 struct xs_transaction *xst, bool hierarchy) 471 { 472 int err; 473 const char *message; 474 const char *node = xenbus_get_node(dev); 475 char *path; 476 size_t path_size; 477 478 KASSERT(rxq->id == txq->id, ("Mismatch between RX and TX queue ids")); 479 /* Split event channel support is not yet there. */ 480 KASSERT(rxq->xen_intr_handle == txq->xen_intr_handle, 481 ("Split event channels are not supported")); 482 483 if (hierarchy) { 484 path_size = strlen(node) + 10; 485 path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); 486 snprintf(path, path_size, "%s/queue-%u", node, rxq->id); 487 } else { 488 path_size = strlen(node) + 1; 489 path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); 490 snprintf(path, path_size, "%s", node); 491 } 492 493 err = xs_printf(*xst, path, "tx-ring-ref","%u", txq->ring_ref); 494 if (err != 0) { 495 message = "writing tx ring-ref"; 496 goto error; 497 } 498 err = xs_printf(*xst, path, "rx-ring-ref","%u", rxq->ring_ref); 499 if (err != 0) { 500 message = "writing rx ring-ref"; 501 goto error; 502 } 503 err = xs_printf(*xst, path, "event-channel", "%u", 504 xen_intr_port(rxq->xen_intr_handle)); 505 if (err != 0) { 506 message = "writing event-channel"; 507 goto error; 508 } 509 510 free(path, M_DEVBUF); 511 512 return (0); 513 514 error: 515 free(path, M_DEVBUF); 516 xenbus_dev_fatal(dev, err, "%s", message); 517 518 return (err); 519 } 520 521 /* Common code used when first setting up, and when resuming. */ 522 static int 523 talk_to_backend(device_t dev, struct netfront_info *info) 524 { 525 const char *message; 526 struct xs_transaction xst; 527 const char *node = xenbus_get_node(dev); 528 int err; 529 unsigned long num_queues, max_queues = 0; 530 unsigned int i; 531 532 err = xen_net_read_mac(dev, info->mac); 533 if (err != 0) { 534 xenbus_dev_fatal(dev, err, "parsing %s/mac", node); 535 goto out; 536 } 537 538 err = xs_scanf(XST_NIL, xenbus_get_otherend_path(info->xbdev), 539 "multi-queue-max-queues", NULL, "%lu", &max_queues); 540 if (err != 0) 541 max_queues = 1; 542 num_queues = xn_num_queues; 543 if (num_queues > max_queues) 544 num_queues = max_queues; 545 546 err = setup_device(dev, info, num_queues); 547 if (err != 0) 548 goto out; 549 550 again: 551 err = xs_transaction_start(&xst); 552 if (err != 0) { 553 xenbus_dev_fatal(dev, err, "starting transaction"); 554 goto free; 555 } 556 557 if (info->num_queues == 1) { 558 err = write_queue_xenstore_keys(dev, &info->rxq[0], 559 &info->txq[0], &xst, false); 560 if (err != 0) 561 goto abort_transaction_no_def_error; 562 } else { 563 err = xs_printf(xst, node, "multi-queue-num-queues", 564 "%u", info->num_queues); 565 if (err != 0) { 566 message = "writing multi-queue-num-queues"; 567 goto abort_transaction; 568 } 569 570 for (i = 0; i < info->num_queues; i++) { 571 err = write_queue_xenstore_keys(dev, &info->rxq[i], 572 &info->txq[i], &xst, true); 573 if (err != 0) 574 goto abort_transaction_no_def_error; 575 } 576 } 577 578 err = xs_printf(xst, node, "request-rx-copy", "%u", 1); 579 if (err != 0) { 580 message = "writing request-rx-copy"; 581 goto abort_transaction; 582 } 583 err = xs_printf(xst, node, "feature-rx-notify", "%d", 1); 584 if (err != 0) { 585 message = "writing feature-rx-notify"; 586 goto abort_transaction; 587 } 588 err = xs_printf(xst, node, "feature-sg", "%d", 1); 589 if (err != 0) { 590 message = "writing feature-sg"; 591 goto abort_transaction; 592 } 593 err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1); 594 if (err != 0) { 595 message = "writing feature-gso-tcpv4"; 596 goto abort_transaction; 597 } 598 599 err = xs_transaction_end(xst, 0); 600 if (err != 0) { 601 if (err == EAGAIN) 602 goto again; 603 xenbus_dev_fatal(dev, err, "completing transaction"); 604 goto free; 605 } 606 607 return 0; 608 609 abort_transaction: 610 xenbus_dev_fatal(dev, err, "%s", message); 611 abort_transaction_no_def_error: 612 xs_transaction_end(xst, 1); 613 free: 614 netif_free(info); 615 out: 616 return (err); 617 } 618 619 static void 620 xn_rxq_intr(struct netfront_rxq *rxq) 621 { 622 623 XN_RX_LOCK(rxq); 624 xn_rxeof(rxq); 625 XN_RX_UNLOCK(rxq); 626 } 627 628 static void 629 xn_txq_start(struct netfront_txq *txq) 630 { 631 struct netfront_info *np = txq->info; 632 struct ifnet *ifp = np->xn_ifp; 633 634 XN_TX_LOCK_ASSERT(txq); 635 if (!drbr_empty(ifp, txq->br)) 636 xn_txq_mq_start_locked(txq, NULL); 637 } 638 639 static void 640 xn_txq_intr(struct netfront_txq *txq) 641 { 642 643 XN_TX_LOCK(txq); 644 if (RING_HAS_UNCONSUMED_RESPONSES(&txq->ring)) 645 xn_txeof(txq); 646 xn_txq_start(txq); 647 XN_TX_UNLOCK(txq); 648 } 649 650 static void 651 xn_txq_tq_deferred(void *xtxq, int pending) 652 { 653 struct netfront_txq *txq = xtxq; 654 655 XN_TX_LOCK(txq); 656 xn_txq_start(txq); 657 XN_TX_UNLOCK(txq); 658 } 659 660 static void 661 disconnect_rxq(struct netfront_rxq *rxq) 662 { 663 664 xn_release_rx_bufs(rxq); 665 gnttab_free_grant_references(rxq->gref_head); 666 gnttab_end_foreign_access(rxq->ring_ref, NULL); 667 /* 668 * No split event channel support at the moment, handle will 669 * be unbound in tx. So no need to call xen_intr_unbind here, 670 * but we do want to reset the handler to 0. 671 */ 672 rxq->xen_intr_handle = 0; 673 } 674 675 static void 676 destroy_rxq(struct netfront_rxq *rxq) 677 { 678 679 callout_drain(&rxq->rx_refill); 680 free(rxq->ring.sring, M_DEVBUF); 681 } 682 683 static void 684 destroy_rxqs(struct netfront_info *np) 685 { 686 int i; 687 688 for (i = 0; i < np->num_queues; i++) 689 destroy_rxq(&np->rxq[i]); 690 691 free(np->rxq, M_DEVBUF); 692 np->rxq = NULL; 693 } 694 695 static int 696 setup_rxqs(device_t dev, struct netfront_info *info, 697 unsigned long num_queues) 698 { 699 int q, i; 700 int error; 701 netif_rx_sring_t *rxs; 702 struct netfront_rxq *rxq; 703 704 info->rxq = malloc(sizeof(struct netfront_rxq) * num_queues, 705 M_DEVBUF, M_WAITOK|M_ZERO); 706 707 for (q = 0; q < num_queues; q++) { 708 rxq = &info->rxq[q]; 709 710 rxq->id = q; 711 rxq->info = info; 712 rxq->ring_ref = GRANT_REF_INVALID; 713 rxq->ring.sring = NULL; 714 snprintf(rxq->name, XN_QUEUE_NAME_LEN, "xnrx_%u", q); 715 mtx_init(&rxq->lock, rxq->name, "netfront receive lock", 716 MTX_DEF); 717 718 for (i = 0; i <= NET_RX_RING_SIZE; i++) { 719 rxq->mbufs[i] = NULL; 720 rxq->grant_ref[i] = GRANT_REF_INVALID; 721 } 722 723 /* Start resources allocation */ 724 725 if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, 726 &rxq->gref_head) != 0) { 727 device_printf(dev, "allocating rx gref"); 728 error = ENOMEM; 729 goto fail; 730 } 731 732 rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, 733 M_WAITOK|M_ZERO); 734 SHARED_RING_INIT(rxs); 735 FRONT_RING_INIT(&rxq->ring, rxs, PAGE_SIZE); 736 737 error = xenbus_grant_ring(dev, virt_to_mfn(rxs), 738 &rxq->ring_ref); 739 if (error != 0) { 740 device_printf(dev, "granting rx ring page"); 741 goto fail_grant_ring; 742 } 743 744 callout_init(&rxq->rx_refill, 1); 745 } 746 747 return (0); 748 749 fail_grant_ring: 750 gnttab_free_grant_references(rxq->gref_head); 751 free(rxq->ring.sring, M_DEVBUF); 752 fail: 753 for (; q >= 0; q--) { 754 disconnect_rxq(&info->rxq[q]); 755 destroy_rxq(&info->rxq[q]); 756 } 757 758 free(info->rxq, M_DEVBUF); 759 return (error); 760 } 761 762 static void 763 disconnect_txq(struct netfront_txq *txq) 764 { 765 766 xn_release_tx_bufs(txq); 767 gnttab_free_grant_references(txq->gref_head); 768 gnttab_end_foreign_access(txq->ring_ref, NULL); 769 xen_intr_unbind(&txq->xen_intr_handle); 770 } 771 772 static void 773 destroy_txq(struct netfront_txq *txq) 774 { 775 776 free(txq->ring.sring, M_DEVBUF); 777 buf_ring_free(txq->br, M_DEVBUF); 778 taskqueue_drain_all(txq->tq); 779 taskqueue_free(txq->tq); 780 } 781 782 static void 783 destroy_txqs(struct netfront_info *np) 784 { 785 int i; 786 787 for (i = 0; i < np->num_queues; i++) 788 destroy_txq(&np->txq[i]); 789 790 free(np->txq, M_DEVBUF); 791 np->txq = NULL; 792 } 793 794 static int 795 setup_txqs(device_t dev, struct netfront_info *info, 796 unsigned long num_queues) 797 { 798 int q, i; 799 int error; 800 netif_tx_sring_t *txs; 801 struct netfront_txq *txq; 802 803 info->txq = malloc(sizeof(struct netfront_txq) * num_queues, 804 M_DEVBUF, M_WAITOK|M_ZERO); 805 806 for (q = 0; q < num_queues; q++) { 807 txq = &info->txq[q]; 808 809 txq->id = q; 810 txq->info = info; 811 812 txq->ring_ref = GRANT_REF_INVALID; 813 txq->ring.sring = NULL; 814 815 snprintf(txq->name, XN_QUEUE_NAME_LEN, "xntx_%u", q); 816 817 mtx_init(&txq->lock, txq->name, "netfront transmit lock", 818 MTX_DEF); 819 820 for (i = 0; i <= NET_TX_RING_SIZE; i++) { 821 txq->mbufs[i] = (void *) ((u_long) i+1); 822 txq->grant_ref[i] = GRANT_REF_INVALID; 823 } 824 txq->mbufs[NET_TX_RING_SIZE] = (void *)0; 825 826 /* Start resources allocation. */ 827 828 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, 829 &txq->gref_head) != 0) { 830 device_printf(dev, "failed to allocate tx grant refs\n"); 831 error = ENOMEM; 832 goto fail; 833 } 834 835 txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, 836 M_WAITOK|M_ZERO); 837 SHARED_RING_INIT(txs); 838 FRONT_RING_INIT(&txq->ring, txs, PAGE_SIZE); 839 840 error = xenbus_grant_ring(dev, virt_to_mfn(txs), 841 &txq->ring_ref); 842 if (error != 0) { 843 device_printf(dev, "failed to grant tx ring\n"); 844 goto fail_grant_ring; 845 } 846 847 txq->br = buf_ring_alloc(NET_TX_RING_SIZE, M_DEVBUF, 848 M_WAITOK, &txq->lock); 849 TASK_INIT(&txq->defrtask, 0, xn_txq_tq_deferred, txq); 850 851 txq->tq = taskqueue_create(txq->name, M_WAITOK, 852 taskqueue_thread_enqueue, &txq->tq); 853 854 error = taskqueue_start_threads(&txq->tq, 1, PI_NET, 855 "%s txq %d", device_get_nameunit(dev), txq->id); 856 if (error != 0) { 857 device_printf(dev, "failed to start tx taskq %d\n", 858 txq->id); 859 goto fail_start_thread; 860 } 861 862 error = xen_intr_alloc_and_bind_local_port(dev, 863 xenbus_get_otherend_id(dev), /* filter */ NULL, xn_intr, 864 &info->txq[q], INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY, 865 &txq->xen_intr_handle); 866 867 if (error != 0) { 868 device_printf(dev, "xen_intr_alloc_and_bind_local_port failed\n"); 869 goto fail_bind_port; 870 } 871 } 872 873 return (0); 874 875 fail_bind_port: 876 taskqueue_drain_all(txq->tq); 877 fail_start_thread: 878 buf_ring_free(txq->br, M_DEVBUF); 879 taskqueue_free(txq->tq); 880 gnttab_end_foreign_access(txq->ring_ref, NULL); 881 fail_grant_ring: 882 gnttab_free_grant_references(txq->gref_head); 883 free(txq->ring.sring, M_DEVBUF); 884 fail: 885 for (; q >= 0; q--) { 886 disconnect_txq(&info->txq[q]); 887 destroy_txq(&info->txq[q]); 888 } 889 890 free(info->txq, M_DEVBUF); 891 return (error); 892 } 893 894 static int 895 setup_device(device_t dev, struct netfront_info *info, 896 unsigned long num_queues) 897 { 898 int error; 899 int q; 900 901 if (info->txq) 902 destroy_txqs(info); 903 904 if (info->rxq) 905 destroy_rxqs(info); 906 907 info->num_queues = 0; 908 909 error = setup_rxqs(dev, info, num_queues); 910 if (error != 0) 911 goto out; 912 error = setup_txqs(dev, info, num_queues); 913 if (error != 0) 914 goto out; 915 916 info->num_queues = num_queues; 917 918 /* No split event channel at the moment. */ 919 for (q = 0; q < num_queues; q++) 920 info->rxq[q].xen_intr_handle = info->txq[q].xen_intr_handle; 921 922 return (0); 923 924 out: 925 KASSERT(error != 0, ("Error path taken without providing an error code")); 926 return (error); 927 } 928 929 #ifdef INET 930 /** 931 * If this interface has an ipv4 address, send an arp for it. This 932 * helps to get the network going again after migrating hosts. 933 */ 934 static void 935 netfront_send_fake_arp(device_t dev, struct netfront_info *info) 936 { 937 struct ifnet *ifp; 938 struct ifaddr *ifa; 939 940 ifp = info->xn_ifp; 941 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 942 if (ifa->ifa_addr->sa_family == AF_INET) { 943 arp_ifinit(ifp, ifa); 944 } 945 } 946 } 947 #endif 948 949 /** 950 * Callback received when the backend's state changes. 951 */ 952 static void 953 netfront_backend_changed(device_t dev, XenbusState newstate) 954 { 955 struct netfront_info *sc = device_get_softc(dev); 956 957 DPRINTK("newstate=%d\n", newstate); 958 959 switch (newstate) { 960 case XenbusStateInitialising: 961 case XenbusStateInitialised: 962 case XenbusStateUnknown: 963 case XenbusStateClosed: 964 case XenbusStateReconfigured: 965 case XenbusStateReconfiguring: 966 break; 967 case XenbusStateInitWait: 968 if (xenbus_get_state(dev) != XenbusStateInitialising) 969 break; 970 if (xn_connect(sc) != 0) 971 break; 972 xenbus_set_state(dev, XenbusStateConnected); 973 break; 974 case XenbusStateClosing: 975 xenbus_set_state(dev, XenbusStateClosed); 976 break; 977 case XenbusStateConnected: 978 #ifdef INET 979 netfront_send_fake_arp(dev, sc); 980 #endif 981 break; 982 } 983 } 984 985 /** 986 * \brief Verify that there is sufficient space in the Tx ring 987 * buffer for a maximally sized request to be enqueued. 988 * 989 * A transmit request requires a transmit descriptor for each packet 990 * fragment, plus up to 2 entries for "options" (e.g. TSO). 991 */ 992 static inline int 993 xn_tx_slot_available(struct netfront_txq *txq) 994 { 995 996 return (RING_FREE_REQUESTS(&txq->ring) > (MAX_TX_REQ_FRAGS + 2)); 997 } 998 999 static void 1000 xn_release_tx_bufs(struct netfront_txq *txq) 1001 { 1002 int i; 1003 1004 for (i = 1; i <= NET_TX_RING_SIZE; i++) { 1005 struct mbuf *m; 1006 1007 m = txq->mbufs[i]; 1008 1009 /* 1010 * We assume that no kernel addresses are 1011 * less than NET_TX_RING_SIZE. Any entry 1012 * in the table that is below this number 1013 * must be an index from free-list tracking. 1014 */ 1015 if (((uintptr_t)m) <= NET_TX_RING_SIZE) 1016 continue; 1017 gnttab_end_foreign_access_ref(txq->grant_ref[i]); 1018 gnttab_release_grant_reference(&txq->gref_head, 1019 txq->grant_ref[i]); 1020 txq->grant_ref[i] = GRANT_REF_INVALID; 1021 add_id_to_freelist(txq->mbufs, i); 1022 txq->mbufs_cnt--; 1023 if (txq->mbufs_cnt < 0) { 1024 panic("%s: tx_chain_cnt must be >= 0", __func__); 1025 } 1026 m_free(m); 1027 } 1028 } 1029 1030 static struct mbuf * 1031 xn_alloc_one_rx_buffer(struct netfront_rxq *rxq) 1032 { 1033 struct mbuf *m; 1034 1035 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); 1036 if (m == NULL) 1037 return NULL; 1038 m->m_len = m->m_pkthdr.len = MJUMPAGESIZE; 1039 1040 return (m); 1041 } 1042 1043 static void 1044 xn_alloc_rx_buffers(struct netfront_rxq *rxq) 1045 { 1046 RING_IDX req_prod; 1047 int notify; 1048 1049 XN_RX_LOCK_ASSERT(rxq); 1050 1051 if (__predict_false(rxq->info->carrier == 0)) 1052 return; 1053 1054 for (req_prod = rxq->ring.req_prod_pvt; 1055 req_prod - rxq->ring.rsp_cons < NET_RX_RING_SIZE; 1056 req_prod++) { 1057 struct mbuf *m; 1058 unsigned short id; 1059 grant_ref_t ref; 1060 struct netif_rx_request *req; 1061 unsigned long pfn; 1062 1063 m = xn_alloc_one_rx_buffer(rxq); 1064 if (m == NULL) 1065 break; 1066 1067 id = xn_rxidx(req_prod); 1068 1069 KASSERT(rxq->mbufs[id] == NULL, ("non-NULL xn_rx_chain")); 1070 rxq->mbufs[id] = m; 1071 1072 ref = gnttab_claim_grant_reference(&rxq->gref_head); 1073 KASSERT(ref != GNTTAB_LIST_END, 1074 ("reserved grant references exhuasted")); 1075 rxq->grant_ref[id] = ref; 1076 1077 pfn = atop(vtophys(mtod(m, vm_offset_t))); 1078 req = RING_GET_REQUEST(&rxq->ring, req_prod); 1079 1080 gnttab_grant_foreign_access_ref(ref, 1081 xenbus_get_otherend_id(rxq->info->xbdev), pfn, 0); 1082 req->id = id; 1083 req->gref = ref; 1084 } 1085 1086 rxq->ring.req_prod_pvt = req_prod; 1087 1088 /* Not enough requests? Try again later. */ 1089 if (req_prod - rxq->ring.rsp_cons < NET_RX_SLOTS_MIN) { 1090 callout_reset(&rxq->rx_refill, hz/10, xn_alloc_rx_buffers_callout, 1091 rxq); 1092 return; 1093 } 1094 1095 wmb(); /* barrier so backend seens requests */ 1096 1097 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rxq->ring, notify); 1098 if (notify) 1099 xen_intr_signal(rxq->xen_intr_handle); 1100 } 1101 1102 static void xn_alloc_rx_buffers_callout(void *arg) 1103 { 1104 struct netfront_rxq *rxq; 1105 1106 rxq = (struct netfront_rxq *)arg; 1107 XN_RX_LOCK(rxq); 1108 xn_alloc_rx_buffers(rxq); 1109 XN_RX_UNLOCK(rxq); 1110 } 1111 1112 static void 1113 xn_release_rx_bufs(struct netfront_rxq *rxq) 1114 { 1115 int i, ref; 1116 struct mbuf *m; 1117 1118 for (i = 0; i < NET_RX_RING_SIZE; i++) { 1119 m = rxq->mbufs[i]; 1120 1121 if (m == NULL) 1122 continue; 1123 1124 ref = rxq->grant_ref[i]; 1125 if (ref == GRANT_REF_INVALID) 1126 continue; 1127 1128 gnttab_end_foreign_access_ref(ref); 1129 gnttab_release_grant_reference(&rxq->gref_head, ref); 1130 rxq->mbufs[i] = NULL; 1131 rxq->grant_ref[i] = GRANT_REF_INVALID; 1132 m_freem(m); 1133 } 1134 } 1135 1136 static void 1137 xn_rxeof(struct netfront_rxq *rxq) 1138 { 1139 struct ifnet *ifp; 1140 struct netfront_info *np = rxq->info; 1141 #if (defined(INET) || defined(INET6)) 1142 struct lro_ctrl *lro = &rxq->lro; 1143 #endif 1144 struct netfront_rx_info rinfo; 1145 struct netif_rx_response *rx = &rinfo.rx; 1146 struct netif_extra_info *extras = rinfo.extras; 1147 RING_IDX i, rp; 1148 struct mbuf *m; 1149 struct mbufq mbufq_rxq, mbufq_errq; 1150 int err, work_to_do; 1151 1152 do { 1153 XN_RX_LOCK_ASSERT(rxq); 1154 if (!netfront_carrier_ok(np)) 1155 return; 1156 1157 /* XXX: there should be some sane limit. */ 1158 mbufq_init(&mbufq_errq, INT_MAX); 1159 mbufq_init(&mbufq_rxq, INT_MAX); 1160 1161 ifp = np->xn_ifp; 1162 1163 rp = rxq->ring.sring->rsp_prod; 1164 rmb(); /* Ensure we see queued responses up to 'rp'. */ 1165 1166 i = rxq->ring.rsp_cons; 1167 while ((i != rp)) { 1168 memcpy(rx, RING_GET_RESPONSE(&rxq->ring, i), sizeof(*rx)); 1169 memset(extras, 0, sizeof(rinfo.extras)); 1170 1171 m = NULL; 1172 err = xn_get_responses(rxq, &rinfo, rp, &i, &m); 1173 1174 if (__predict_false(err)) { 1175 if (m) 1176 (void )mbufq_enqueue(&mbufq_errq, m); 1177 rxq->stats.rx_errors++; 1178 continue; 1179 } 1180 1181 m->m_pkthdr.rcvif = ifp; 1182 if ( rx->flags & NETRXF_data_validated ) { 1183 /* Tell the stack the checksums are okay */ 1184 /* 1185 * XXX this isn't necessarily the case - need to add 1186 * check 1187 */ 1188 1189 m->m_pkthdr.csum_flags |= 1190 (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID 1191 | CSUM_PSEUDO_HDR); 1192 m->m_pkthdr.csum_data = 0xffff; 1193 } 1194 if ((rx->flags & NETRXF_extra_info) != 0 && 1195 (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type == 1196 XEN_NETIF_EXTRA_TYPE_GSO)) { 1197 m->m_pkthdr.tso_segsz = 1198 extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].u.gso.size; 1199 m->m_pkthdr.csum_flags |= CSUM_TSO; 1200 } 1201 1202 rxq->stats.rx_packets++; 1203 rxq->stats.rx_bytes += m->m_pkthdr.len; 1204 1205 (void )mbufq_enqueue(&mbufq_rxq, m); 1206 rxq->ring.rsp_cons = i; 1207 } 1208 1209 mbufq_drain(&mbufq_errq); 1210 1211 /* 1212 * Process all the mbufs after the remapping is complete. 1213 * Break the mbuf chain first though. 1214 */ 1215 while ((m = mbufq_dequeue(&mbufq_rxq)) != NULL) { 1216 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 1217 1218 /* XXX: Do we really need to drop the rx lock? */ 1219 XN_RX_UNLOCK(rxq); 1220 #if (defined(INET) || defined(INET6)) 1221 /* Use LRO if possible */ 1222 if ((ifp->if_capenable & IFCAP_LRO) == 0 || 1223 lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) { 1224 /* 1225 * If LRO fails, pass up to the stack 1226 * directly. 1227 */ 1228 (*ifp->if_input)(ifp, m); 1229 } 1230 #else 1231 (*ifp->if_input)(ifp, m); 1232 #endif 1233 1234 XN_RX_LOCK(rxq); 1235 } 1236 1237 rxq->ring.rsp_cons = i; 1238 1239 #if (defined(INET) || defined(INET6)) 1240 /* 1241 * Flush any outstanding LRO work 1242 */ 1243 tcp_lro_flush_all(lro); 1244 #endif 1245 1246 xn_alloc_rx_buffers(rxq); 1247 1248 RING_FINAL_CHECK_FOR_RESPONSES(&rxq->ring, work_to_do); 1249 } while (work_to_do); 1250 } 1251 1252 static void 1253 xn_txeof(struct netfront_txq *txq) 1254 { 1255 RING_IDX i, prod; 1256 unsigned short id; 1257 struct ifnet *ifp; 1258 netif_tx_response_t *txr; 1259 struct mbuf *m; 1260 struct netfront_info *np = txq->info; 1261 1262 XN_TX_LOCK_ASSERT(txq); 1263 1264 if (!netfront_carrier_ok(np)) 1265 return; 1266 1267 ifp = np->xn_ifp; 1268 1269 do { 1270 prod = txq->ring.sring->rsp_prod; 1271 rmb(); /* Ensure we see responses up to 'rp'. */ 1272 1273 for (i = txq->ring.rsp_cons; i != prod; i++) { 1274 txr = RING_GET_RESPONSE(&txq->ring, i); 1275 if (txr->status == NETIF_RSP_NULL) 1276 continue; 1277 1278 if (txr->status != NETIF_RSP_OKAY) { 1279 printf("%s: WARNING: response is %d!\n", 1280 __func__, txr->status); 1281 } 1282 id = txr->id; 1283 m = txq->mbufs[id]; 1284 KASSERT(m != NULL, ("mbuf not found in chain")); 1285 KASSERT((uintptr_t)m > NET_TX_RING_SIZE, 1286 ("mbuf already on the free list, but we're " 1287 "trying to free it again!")); 1288 M_ASSERTVALID(m); 1289 1290 /* 1291 * Increment packet count if this is the last 1292 * mbuf of the chain. 1293 */ 1294 if (!m->m_next) 1295 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 1296 if (__predict_false(gnttab_query_foreign_access( 1297 txq->grant_ref[id]) != 0)) { 1298 panic("%s: grant id %u still in use by the " 1299 "backend", __func__, id); 1300 } 1301 gnttab_end_foreign_access_ref(txq->grant_ref[id]); 1302 gnttab_release_grant_reference( 1303 &txq->gref_head, txq->grant_ref[id]); 1304 txq->grant_ref[id] = GRANT_REF_INVALID; 1305 1306 txq->mbufs[id] = NULL; 1307 add_id_to_freelist(txq->mbufs, id); 1308 txq->mbufs_cnt--; 1309 m_free(m); 1310 /* Only mark the txq active if we've freed up at least one slot to try */ 1311 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1312 } 1313 txq->ring.rsp_cons = prod; 1314 1315 /* 1316 * Set a new event, then check for race with update of 1317 * tx_cons. Note that it is essential to schedule a 1318 * callback, no matter how few buffers are pending. Even if 1319 * there is space in the transmit ring, higher layers may 1320 * be blocked because too much data is outstanding: in such 1321 * cases notification from Xen is likely to be the only kick 1322 * that we'll get. 1323 */ 1324 txq->ring.sring->rsp_event = 1325 prod + ((txq->ring.sring->req_prod - prod) >> 1) + 1; 1326 1327 mb(); 1328 } while (prod != txq->ring.sring->rsp_prod); 1329 1330 if (txq->full && 1331 ((txq->ring.sring->req_prod - prod) < NET_TX_RING_SIZE)) { 1332 txq->full = false; 1333 xn_txq_start(txq); 1334 } 1335 } 1336 1337 static void 1338 xn_intr(void *xsc) 1339 { 1340 struct netfront_txq *txq = xsc; 1341 struct netfront_info *np = txq->info; 1342 struct netfront_rxq *rxq = &np->rxq[txq->id]; 1343 1344 /* kick both tx and rx */ 1345 xn_rxq_intr(rxq); 1346 xn_txq_intr(txq); 1347 } 1348 1349 static void 1350 xn_move_rx_slot(struct netfront_rxq *rxq, struct mbuf *m, 1351 grant_ref_t ref) 1352 { 1353 int new = xn_rxidx(rxq->ring.req_prod_pvt); 1354 1355 KASSERT(rxq->mbufs[new] == NULL, ("mbufs != NULL")); 1356 rxq->mbufs[new] = m; 1357 rxq->grant_ref[new] = ref; 1358 RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->id = new; 1359 RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->gref = ref; 1360 rxq->ring.req_prod_pvt++; 1361 } 1362 1363 static int 1364 xn_get_extras(struct netfront_rxq *rxq, 1365 struct netif_extra_info *extras, RING_IDX rp, RING_IDX *cons) 1366 { 1367 struct netif_extra_info *extra; 1368 1369 int err = 0; 1370 1371 do { 1372 struct mbuf *m; 1373 grant_ref_t ref; 1374 1375 if (__predict_false(*cons + 1 == rp)) { 1376 err = EINVAL; 1377 break; 1378 } 1379 1380 extra = (struct netif_extra_info *) 1381 RING_GET_RESPONSE(&rxq->ring, ++(*cons)); 1382 1383 if (__predict_false(!extra->type || 1384 extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 1385 err = EINVAL; 1386 } else { 1387 memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); 1388 } 1389 1390 m = xn_get_rx_mbuf(rxq, *cons); 1391 ref = xn_get_rx_ref(rxq, *cons); 1392 xn_move_rx_slot(rxq, m, ref); 1393 } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); 1394 1395 return err; 1396 } 1397 1398 static int 1399 xn_get_responses(struct netfront_rxq *rxq, 1400 struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, 1401 struct mbuf **list) 1402 { 1403 struct netif_rx_response *rx = &rinfo->rx; 1404 struct netif_extra_info *extras = rinfo->extras; 1405 struct mbuf *m, *m0, *m_prev; 1406 grant_ref_t ref = xn_get_rx_ref(rxq, *cons); 1407 RING_IDX ref_cons = *cons; 1408 int frags = 1; 1409 int err = 0; 1410 u_long ret; 1411 1412 m0 = m = m_prev = xn_get_rx_mbuf(rxq, *cons); 1413 1414 if (rx->flags & NETRXF_extra_info) { 1415 err = xn_get_extras(rxq, extras, rp, cons); 1416 } 1417 1418 if (m0 != NULL) { 1419 m0->m_pkthdr.len = 0; 1420 m0->m_next = NULL; 1421 } 1422 1423 for (;;) { 1424 #if 0 1425 DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n", 1426 rx->status, rx->offset, frags); 1427 #endif 1428 if (__predict_false(rx->status < 0 || 1429 rx->offset + rx->status > PAGE_SIZE)) { 1430 1431 xn_move_rx_slot(rxq, m, ref); 1432 if (m0 == m) 1433 m0 = NULL; 1434 m = NULL; 1435 err = EINVAL; 1436 goto next_skip_queue; 1437 } 1438 1439 /* 1440 * This definitely indicates a bug, either in this driver or in 1441 * the backend driver. In future this should flag the bad 1442 * situation to the system controller to reboot the backed. 1443 */ 1444 if (ref == GRANT_REF_INVALID) { 1445 printf("%s: Bad rx response id %d.\n", __func__, rx->id); 1446 err = EINVAL; 1447 goto next; 1448 } 1449 1450 ret = gnttab_end_foreign_access_ref(ref); 1451 KASSERT(ret, ("Unable to end access to grant references")); 1452 1453 gnttab_release_grant_reference(&rxq->gref_head, ref); 1454 1455 next: 1456 if (m == NULL) 1457 break; 1458 1459 m->m_len = rx->status; 1460 m->m_data += rx->offset; 1461 m0->m_pkthdr.len += rx->status; 1462 1463 next_skip_queue: 1464 if (!(rx->flags & NETRXF_more_data)) 1465 break; 1466 1467 if (*cons + frags == rp) { 1468 if (net_ratelimit()) 1469 WPRINTK("Need more frags\n"); 1470 err = ENOENT; 1471 printf("%s: cons %u frags %u rp %u, not enough frags\n", 1472 __func__, *cons, frags, rp); 1473 break; 1474 } 1475 /* 1476 * Note that m can be NULL, if rx->status < 0 or if 1477 * rx->offset + rx->status > PAGE_SIZE above. 1478 */ 1479 m_prev = m; 1480 1481 rx = RING_GET_RESPONSE(&rxq->ring, *cons + frags); 1482 m = xn_get_rx_mbuf(rxq, *cons + frags); 1483 1484 /* 1485 * m_prev == NULL can happen if rx->status < 0 or if 1486 * rx->offset + * rx->status > PAGE_SIZE above. 1487 */ 1488 if (m_prev != NULL) 1489 m_prev->m_next = m; 1490 1491 /* 1492 * m0 can be NULL if rx->status < 0 or if * rx->offset + 1493 * rx->status > PAGE_SIZE above. 1494 */ 1495 if (m0 == NULL) 1496 m0 = m; 1497 m->m_next = NULL; 1498 ref = xn_get_rx_ref(rxq, *cons + frags); 1499 ref_cons = *cons + frags; 1500 frags++; 1501 } 1502 *list = m0; 1503 *cons += frags; 1504 1505 return (err); 1506 } 1507 1508 /** 1509 * \brief Count the number of fragments in an mbuf chain. 1510 * 1511 * Surprisingly, there isn't an M* macro for this. 1512 */ 1513 static inline int 1514 xn_count_frags(struct mbuf *m) 1515 { 1516 int nfrags; 1517 1518 for (nfrags = 0; m != NULL; m = m->m_next) 1519 nfrags++; 1520 1521 return (nfrags); 1522 } 1523 1524 /** 1525 * Given an mbuf chain, make sure we have enough room and then push 1526 * it onto the transmit ring. 1527 */ 1528 static int 1529 xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) 1530 { 1531 struct mbuf *m; 1532 struct netfront_info *np = txq->info; 1533 struct ifnet *ifp = np->xn_ifp; 1534 u_int nfrags; 1535 int otherend_id; 1536 1537 /** 1538 * Defragment the mbuf if necessary. 1539 */ 1540 nfrags = xn_count_frags(m_head); 1541 1542 /* 1543 * Check to see whether this request is longer than netback 1544 * can handle, and try to defrag it. 1545 */ 1546 /** 1547 * It is a bit lame, but the netback driver in Linux can't 1548 * deal with nfrags > MAX_TX_REQ_FRAGS, which is a quirk of 1549 * the Linux network stack. 1550 */ 1551 if (nfrags > np->maxfrags) { 1552 m = m_defrag(m_head, M_NOWAIT); 1553 if (!m) { 1554 /* 1555 * Defrag failed, so free the mbuf and 1556 * therefore drop the packet. 1557 */ 1558 m_freem(m_head); 1559 return (EMSGSIZE); 1560 } 1561 m_head = m; 1562 } 1563 1564 /* Determine how many fragments now exist */ 1565 nfrags = xn_count_frags(m_head); 1566 1567 /* 1568 * Check to see whether the defragmented packet has too many 1569 * segments for the Linux netback driver. 1570 */ 1571 /** 1572 * The FreeBSD TCP stack, with TSO enabled, can produce a chain 1573 * of mbufs longer than Linux can handle. Make sure we don't 1574 * pass a too-long chain over to the other side by dropping the 1575 * packet. It doesn't look like there is currently a way to 1576 * tell the TCP stack to generate a shorter chain of packets. 1577 */ 1578 if (nfrags > MAX_TX_REQ_FRAGS) { 1579 #ifdef DEBUG 1580 printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback " 1581 "won't be able to handle it, dropping\n", 1582 __func__, nfrags, MAX_TX_REQ_FRAGS); 1583 #endif 1584 m_freem(m_head); 1585 return (EMSGSIZE); 1586 } 1587 1588 /* 1589 * This check should be redundant. We've already verified that we 1590 * have enough slots in the ring to handle a packet of maximum 1591 * size, and that our packet is less than the maximum size. Keep 1592 * it in here as an assert for now just to make certain that 1593 * chain_cnt is accurate. 1594 */ 1595 KASSERT((txq->mbufs_cnt + nfrags) <= NET_TX_RING_SIZE, 1596 ("%s: chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE " 1597 "(%d)!", __func__, (int) txq->mbufs_cnt, 1598 (int) nfrags, (int) NET_TX_RING_SIZE)); 1599 1600 /* 1601 * Start packing the mbufs in this chain into 1602 * the fragment pointers. Stop when we run out 1603 * of fragments or hit the end of the mbuf chain. 1604 */ 1605 m = m_head; 1606 otherend_id = xenbus_get_otherend_id(np->xbdev); 1607 for (m = m_head; m; m = m->m_next) { 1608 netif_tx_request_t *tx; 1609 uintptr_t id; 1610 grant_ref_t ref; 1611 u_long mfn; /* XXX Wrong type? */ 1612 1613 tx = RING_GET_REQUEST(&txq->ring, txq->ring.req_prod_pvt); 1614 id = get_id_from_freelist(txq->mbufs); 1615 if (id == 0) 1616 panic("%s: was allocated the freelist head!\n", 1617 __func__); 1618 txq->mbufs_cnt++; 1619 if (txq->mbufs_cnt > NET_TX_RING_SIZE) 1620 panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n", 1621 __func__); 1622 txq->mbufs[id] = m; 1623 tx->id = id; 1624 ref = gnttab_claim_grant_reference(&txq->gref_head); 1625 KASSERT((short)ref >= 0, ("Negative ref")); 1626 mfn = virt_to_mfn(mtod(m, vm_offset_t)); 1627 gnttab_grant_foreign_access_ref(ref, otherend_id, 1628 mfn, GNTMAP_readonly); 1629 tx->gref = txq->grant_ref[id] = ref; 1630 tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1); 1631 tx->flags = 0; 1632 if (m == m_head) { 1633 /* 1634 * The first fragment has the entire packet 1635 * size, subsequent fragments have just the 1636 * fragment size. The backend works out the 1637 * true size of the first fragment by 1638 * subtracting the sizes of the other 1639 * fragments. 1640 */ 1641 tx->size = m->m_pkthdr.len; 1642 1643 /* 1644 * The first fragment contains the checksum flags 1645 * and is optionally followed by extra data for 1646 * TSO etc. 1647 */ 1648 /** 1649 * CSUM_TSO requires checksum offloading. 1650 * Some versions of FreeBSD fail to 1651 * set CSUM_TCP in the CSUM_TSO case, 1652 * so we have to test for CSUM_TSO 1653 * explicitly. 1654 */ 1655 if (m->m_pkthdr.csum_flags 1656 & (CSUM_DELAY_DATA | CSUM_TSO)) { 1657 tx->flags |= (NETTXF_csum_blank 1658 | NETTXF_data_validated); 1659 } 1660 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 1661 struct netif_extra_info *gso = 1662 (struct netif_extra_info *) 1663 RING_GET_REQUEST(&txq->ring, 1664 ++txq->ring.req_prod_pvt); 1665 1666 tx->flags |= NETTXF_extra_info; 1667 1668 gso->u.gso.size = m->m_pkthdr.tso_segsz; 1669 gso->u.gso.type = 1670 XEN_NETIF_GSO_TYPE_TCPV4; 1671 gso->u.gso.pad = 0; 1672 gso->u.gso.features = 0; 1673 1674 gso->type = XEN_NETIF_EXTRA_TYPE_GSO; 1675 gso->flags = 0; 1676 } 1677 } else { 1678 tx->size = m->m_len; 1679 } 1680 if (m->m_next) 1681 tx->flags |= NETTXF_more_data; 1682 1683 txq->ring.req_prod_pvt++; 1684 } 1685 BPF_MTAP(ifp, m_head); 1686 1687 xn_txeof(txq); 1688 1689 txq->stats.tx_bytes += m_head->m_pkthdr.len; 1690 txq->stats.tx_packets++; 1691 1692 return (0); 1693 } 1694 1695 /* equivalent of network_open() in Linux */ 1696 static void 1697 xn_ifinit_locked(struct netfront_info *np) 1698 { 1699 struct ifnet *ifp; 1700 int i; 1701 struct netfront_rxq *rxq; 1702 1703 XN_LOCK_ASSERT(np); 1704 1705 ifp = np->xn_ifp; 1706 1707 if (ifp->if_drv_flags & IFF_DRV_RUNNING || !netfront_carrier_ok(np)) 1708 return; 1709 1710 xn_stop(np); 1711 1712 for (i = 0; i < np->num_queues; i++) { 1713 rxq = &np->rxq[i]; 1714 XN_RX_LOCK(rxq); 1715 xn_alloc_rx_buffers(rxq); 1716 rxq->ring.sring->rsp_event = rxq->ring.rsp_cons + 1; 1717 if (RING_HAS_UNCONSUMED_RESPONSES(&rxq->ring)) 1718 xn_rxeof(rxq); 1719 XN_RX_UNLOCK(rxq); 1720 } 1721 1722 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1723 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1724 if_link_state_change(ifp, LINK_STATE_UP); 1725 } 1726 1727 static void 1728 xn_ifinit(void *xsc) 1729 { 1730 struct netfront_info *sc = xsc; 1731 1732 XN_LOCK(sc); 1733 xn_ifinit_locked(sc); 1734 XN_UNLOCK(sc); 1735 } 1736 1737 static int 1738 xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1739 { 1740 struct netfront_info *sc = ifp->if_softc; 1741 struct ifreq *ifr = (struct ifreq *) data; 1742 #ifdef INET 1743 struct ifaddr *ifa = (struct ifaddr *)data; 1744 #endif 1745 1746 int mask, error = 0; 1747 switch(cmd) { 1748 case SIOCSIFADDR: 1749 #ifdef INET 1750 XN_LOCK(sc); 1751 if (ifa->ifa_addr->sa_family == AF_INET) { 1752 ifp->if_flags |= IFF_UP; 1753 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 1754 xn_ifinit_locked(sc); 1755 arp_ifinit(ifp, ifa); 1756 XN_UNLOCK(sc); 1757 } else { 1758 XN_UNLOCK(sc); 1759 #endif 1760 error = ether_ioctl(ifp, cmd, data); 1761 #ifdef INET 1762 } 1763 #endif 1764 break; 1765 case SIOCSIFMTU: 1766 ifp->if_mtu = ifr->ifr_mtu; 1767 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1768 xn_ifinit(sc); 1769 break; 1770 case SIOCSIFFLAGS: 1771 XN_LOCK(sc); 1772 if (ifp->if_flags & IFF_UP) { 1773 /* 1774 * If only the state of the PROMISC flag changed, 1775 * then just use the 'set promisc mode' command 1776 * instead of reinitializing the entire NIC. Doing 1777 * a full re-init means reloading the firmware and 1778 * waiting for it to start up, which may take a 1779 * second or two. 1780 */ 1781 xn_ifinit_locked(sc); 1782 } else { 1783 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1784 xn_stop(sc); 1785 } 1786 } 1787 sc->xn_if_flags = ifp->if_flags; 1788 XN_UNLOCK(sc); 1789 break; 1790 case SIOCSIFCAP: 1791 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 1792 if (mask & IFCAP_TXCSUM) { 1793 if (IFCAP_TXCSUM & ifp->if_capenable) { 1794 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 1795 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 1796 | CSUM_IP | CSUM_TSO); 1797 } else { 1798 ifp->if_capenable |= IFCAP_TXCSUM; 1799 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP 1800 | CSUM_IP); 1801 } 1802 } 1803 if (mask & IFCAP_RXCSUM) { 1804 ifp->if_capenable ^= IFCAP_RXCSUM; 1805 } 1806 if (mask & IFCAP_TSO4) { 1807 if (IFCAP_TSO4 & ifp->if_capenable) { 1808 ifp->if_capenable &= ~IFCAP_TSO4; 1809 ifp->if_hwassist &= ~CSUM_TSO; 1810 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 1811 ifp->if_capenable |= IFCAP_TSO4; 1812 ifp->if_hwassist |= CSUM_TSO; 1813 } else { 1814 IPRINTK("Xen requires tx checksum offload" 1815 " be enabled to use TSO\n"); 1816 error = EINVAL; 1817 } 1818 } 1819 if (mask & IFCAP_LRO) { 1820 ifp->if_capenable ^= IFCAP_LRO; 1821 1822 } 1823 break; 1824 case SIOCADDMULTI: 1825 case SIOCDELMULTI: 1826 break; 1827 case SIOCSIFMEDIA: 1828 case SIOCGIFMEDIA: 1829 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 1830 break; 1831 default: 1832 error = ether_ioctl(ifp, cmd, data); 1833 } 1834 1835 return (error); 1836 } 1837 1838 static void 1839 xn_stop(struct netfront_info *sc) 1840 { 1841 struct ifnet *ifp; 1842 1843 XN_LOCK_ASSERT(sc); 1844 1845 ifp = sc->xn_ifp; 1846 1847 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 1848 if_link_state_change(ifp, LINK_STATE_DOWN); 1849 } 1850 1851 static void 1852 xn_rebuild_rx_bufs(struct netfront_rxq *rxq) 1853 { 1854 int requeue_idx, i; 1855 grant_ref_t ref; 1856 netif_rx_request_t *req; 1857 1858 for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { 1859 struct mbuf *m; 1860 u_long pfn; 1861 1862 if (rxq->mbufs[i] == NULL) 1863 continue; 1864 1865 m = rxq->mbufs[requeue_idx] = xn_get_rx_mbuf(rxq, i); 1866 ref = rxq->grant_ref[requeue_idx] = xn_get_rx_ref(rxq, i); 1867 1868 req = RING_GET_REQUEST(&rxq->ring, requeue_idx); 1869 pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT; 1870 1871 gnttab_grant_foreign_access_ref(ref, 1872 xenbus_get_otherend_id(rxq->info->xbdev), 1873 pfn, 0); 1874 1875 req->gref = ref; 1876 req->id = requeue_idx; 1877 1878 requeue_idx++; 1879 } 1880 1881 rxq->ring.req_prod_pvt = requeue_idx; 1882 } 1883 1884 /* START of Xenolinux helper functions adapted to FreeBSD */ 1885 int 1886 xn_connect(struct netfront_info *np) 1887 { 1888 int i, error; 1889 u_int feature_rx_copy; 1890 struct netfront_rxq *rxq; 1891 struct netfront_txq *txq; 1892 1893 error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 1894 "feature-rx-copy", NULL, "%u", &feature_rx_copy); 1895 if (error != 0) 1896 feature_rx_copy = 0; 1897 1898 /* We only support rx copy. */ 1899 if (!feature_rx_copy) 1900 return (EPROTONOSUPPORT); 1901 1902 /* Recovery procedure: */ 1903 error = talk_to_backend(np->xbdev, np); 1904 if (error != 0) 1905 return (error); 1906 1907 /* Step 1: Reinitialise variables. */ 1908 xn_query_features(np); 1909 xn_configure_features(np); 1910 1911 /* Step 2: Release TX buffer */ 1912 for (i = 0; i < np->num_queues; i++) { 1913 txq = &np->txq[i]; 1914 xn_release_tx_bufs(txq); 1915 } 1916 1917 /* Step 3: Rebuild the RX buffer freelist and the RX ring itself. */ 1918 for (i = 0; i < np->num_queues; i++) { 1919 rxq = &np->rxq[i]; 1920 xn_rebuild_rx_bufs(rxq); 1921 } 1922 1923 /* Step 4: All public and private state should now be sane. Get 1924 * ready to start sending and receiving packets and give the driver 1925 * domain a kick because we've probably just requeued some 1926 * packets. 1927 */ 1928 netfront_carrier_on(np); 1929 for (i = 0; i < np->num_queues; i++) { 1930 txq = &np->txq[i]; 1931 xen_intr_signal(txq->xen_intr_handle); 1932 XN_TX_LOCK(txq); 1933 xn_txeof(txq); 1934 XN_TX_UNLOCK(txq); 1935 XN_RX_LOCK(rxq); 1936 xn_alloc_rx_buffers(rxq); 1937 XN_RX_UNLOCK(rxq); 1938 } 1939 1940 return (0); 1941 } 1942 1943 static void 1944 xn_query_features(struct netfront_info *np) 1945 { 1946 int val; 1947 1948 device_printf(np->xbdev, "backend features:"); 1949 1950 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 1951 "feature-sg", NULL, "%d", &val) != 0) 1952 val = 0; 1953 1954 np->maxfrags = 1; 1955 if (val) { 1956 np->maxfrags = MAX_TX_REQ_FRAGS; 1957 printf(" feature-sg"); 1958 } 1959 1960 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 1961 "feature-gso-tcpv4", NULL, "%d", &val) != 0) 1962 val = 0; 1963 1964 np->xn_ifp->if_capabilities &= ~(IFCAP_TSO4|IFCAP_LRO); 1965 if (val) { 1966 np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO; 1967 printf(" feature-gso-tcp4"); 1968 } 1969 1970 printf("\n"); 1971 } 1972 1973 static int 1974 xn_configure_features(struct netfront_info *np) 1975 { 1976 int err, cap_enabled; 1977 #if (defined(INET) || defined(INET6)) 1978 int i; 1979 #endif 1980 1981 err = 0; 1982 1983 if (np->xn_resume && 1984 ((np->xn_ifp->if_capenable & np->xn_ifp->if_capabilities) 1985 == np->xn_ifp->if_capenable)) { 1986 /* Current options are available, no need to do anything. */ 1987 return (0); 1988 } 1989 1990 /* Try to preserve as many options as possible. */ 1991 if (np->xn_resume) 1992 cap_enabled = np->xn_ifp->if_capenable; 1993 else 1994 cap_enabled = UINT_MAX; 1995 1996 #if (defined(INET) || defined(INET6)) 1997 for (i = 0; i < np->num_queues; i++) 1998 if ((np->xn_ifp->if_capenable & IFCAP_LRO) == 1999 (cap_enabled & IFCAP_LRO)) 2000 tcp_lro_free(&np->rxq[i].lro); 2001 #endif 2002 np->xn_ifp->if_capenable = 2003 np->xn_ifp->if_capabilities & ~(IFCAP_LRO|IFCAP_TSO4) & cap_enabled; 2004 np->xn_ifp->if_hwassist &= ~CSUM_TSO; 2005 #if (defined(INET) || defined(INET6)) 2006 for (i = 0; i < np->num_queues; i++) { 2007 if (xn_enable_lro && (np->xn_ifp->if_capabilities & IFCAP_LRO) == 2008 (cap_enabled & IFCAP_LRO)) { 2009 err = tcp_lro_init(&np->rxq[i].lro); 2010 if (err != 0) { 2011 device_printf(np->xbdev, "LRO initialization failed\n"); 2012 } else { 2013 np->rxq[i].lro.ifp = np->xn_ifp; 2014 np->xn_ifp->if_capenable |= IFCAP_LRO; 2015 } 2016 } 2017 } 2018 if ((np->xn_ifp->if_capabilities & IFCAP_TSO4) == 2019 (cap_enabled & IFCAP_TSO4)) { 2020 np->xn_ifp->if_capenable |= IFCAP_TSO4; 2021 np->xn_ifp->if_hwassist |= CSUM_TSO; 2022 } 2023 #endif 2024 return (err); 2025 } 2026 2027 static int 2028 xn_txq_mq_start_locked(struct netfront_txq *txq, struct mbuf *m) 2029 { 2030 struct netfront_info *np; 2031 struct ifnet *ifp; 2032 struct buf_ring *br; 2033 int error, notify; 2034 2035 np = txq->info; 2036 br = txq->br; 2037 ifp = np->xn_ifp; 2038 error = 0; 2039 2040 XN_TX_LOCK_ASSERT(txq); 2041 2042 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2043 !netfront_carrier_ok(np)) { 2044 if (m != NULL) 2045 error = drbr_enqueue(ifp, br, m); 2046 return (error); 2047 } 2048 2049 if (m != NULL) { 2050 error = drbr_enqueue(ifp, br, m); 2051 if (error != 0) 2052 return (error); 2053 } 2054 2055 while ((m = drbr_peek(ifp, br)) != NULL) { 2056 if (!xn_tx_slot_available(txq)) { 2057 drbr_putback(ifp, br, m); 2058 break; 2059 } 2060 2061 error = xn_assemble_tx_request(txq, m); 2062 /* xn_assemble_tx_request always consumes the mbuf*/ 2063 if (error != 0) { 2064 drbr_advance(ifp, br); 2065 break; 2066 } 2067 2068 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&txq->ring, notify); 2069 if (notify) 2070 xen_intr_signal(txq->xen_intr_handle); 2071 2072 drbr_advance(ifp, br); 2073 } 2074 2075 if (RING_FULL(&txq->ring)) 2076 txq->full = true; 2077 2078 return (0); 2079 } 2080 2081 static int 2082 xn_txq_mq_start(struct ifnet *ifp, struct mbuf *m) 2083 { 2084 struct netfront_info *np; 2085 struct netfront_txq *txq; 2086 int i, npairs, error; 2087 2088 np = ifp->if_softc; 2089 npairs = np->num_queues; 2090 2091 KASSERT(npairs != 0, ("called with 0 available queues")); 2092 2093 /* check if flowid is set */ 2094 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 2095 i = m->m_pkthdr.flowid % npairs; 2096 else 2097 i = curcpu % npairs; 2098 2099 txq = &np->txq[i]; 2100 2101 if (XN_TX_TRYLOCK(txq) != 0) { 2102 error = xn_txq_mq_start_locked(txq, m); 2103 XN_TX_UNLOCK(txq); 2104 } else { 2105 error = drbr_enqueue(ifp, txq->br, m); 2106 taskqueue_enqueue(txq->tq, &txq->defrtask); 2107 } 2108 2109 return (error); 2110 } 2111 2112 static void 2113 xn_qflush(struct ifnet *ifp) 2114 { 2115 struct netfront_info *np; 2116 struct netfront_txq *txq; 2117 struct mbuf *m; 2118 int i; 2119 2120 np = ifp->if_softc; 2121 2122 for (i = 0; i < np->num_queues; i++) { 2123 txq = &np->txq[i]; 2124 2125 XN_TX_LOCK(txq); 2126 while ((m = buf_ring_dequeue_sc(txq->br)) != NULL) 2127 m_freem(m); 2128 XN_TX_UNLOCK(txq); 2129 } 2130 2131 if_qflush(ifp); 2132 } 2133 2134 /** 2135 * Create a network device. 2136 * @param dev Newbus device representing this virtual NIC. 2137 */ 2138 int 2139 create_netdev(device_t dev) 2140 { 2141 struct netfront_info *np; 2142 int err; 2143 struct ifnet *ifp; 2144 2145 np = device_get_softc(dev); 2146 2147 np->xbdev = dev; 2148 2149 mtx_init(&np->sc_lock, "xnsc", "netfront softc lock", MTX_DEF); 2150 2151 ifmedia_init(&np->sc_media, 0, xn_ifmedia_upd, xn_ifmedia_sts); 2152 ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); 2153 ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL); 2154 2155 err = xen_net_read_mac(dev, np->mac); 2156 if (err != 0) 2157 goto error; 2158 2159 /* Set up ifnet structure */ 2160 ifp = np->xn_ifp = if_alloc(IFT_ETHER); 2161 ifp->if_softc = np; 2162 if_initname(ifp, "xn", device_get_unit(dev)); 2163 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 2164 ifp->if_ioctl = xn_ioctl; 2165 2166 ifp->if_transmit = xn_txq_mq_start; 2167 ifp->if_qflush = xn_qflush; 2168 2169 ifp->if_init = xn_ifinit; 2170 2171 ifp->if_hwassist = XN_CSUM_FEATURES; 2172 ifp->if_capabilities = IFCAP_HWCSUM; 2173 ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 2174 ifp->if_hw_tsomaxsegcount = MAX_TX_REQ_FRAGS; 2175 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 2176 2177 ether_ifattach(ifp, np->mac); 2178 netfront_carrier_off(np); 2179 2180 return (0); 2181 2182 error: 2183 KASSERT(err != 0, ("Error path with no error code specified")); 2184 return (err); 2185 } 2186 2187 static int 2188 netfront_detach(device_t dev) 2189 { 2190 struct netfront_info *info = device_get_softc(dev); 2191 2192 DPRINTK("%s\n", xenbus_get_node(dev)); 2193 2194 netif_free(info); 2195 2196 return 0; 2197 } 2198 2199 static void 2200 netif_free(struct netfront_info *np) 2201 { 2202 2203 XN_LOCK(np); 2204 xn_stop(np); 2205 XN_UNLOCK(np); 2206 netif_disconnect_backend(np); 2207 ether_ifdetach(np->xn_ifp); 2208 free(np->rxq, M_DEVBUF); 2209 free(np->txq, M_DEVBUF); 2210 if_free(np->xn_ifp); 2211 np->xn_ifp = NULL; 2212 ifmedia_removeall(&np->sc_media); 2213 } 2214 2215 static void 2216 netif_disconnect_backend(struct netfront_info *np) 2217 { 2218 u_int i; 2219 2220 for (i = 0; i < np->num_queues; i++) { 2221 XN_RX_LOCK(&np->rxq[i]); 2222 XN_TX_LOCK(&np->txq[i]); 2223 } 2224 netfront_carrier_off(np); 2225 for (i = 0; i < np->num_queues; i++) { 2226 XN_RX_UNLOCK(&np->rxq[i]); 2227 XN_TX_UNLOCK(&np->txq[i]); 2228 } 2229 2230 for (i = 0; i < np->num_queues; i++) { 2231 disconnect_rxq(&np->rxq[i]); 2232 disconnect_txq(&np->txq[i]); 2233 } 2234 } 2235 2236 static int 2237 xn_ifmedia_upd(struct ifnet *ifp) 2238 { 2239 2240 return (0); 2241 } 2242 2243 static void 2244 xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 2245 { 2246 2247 ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE; 2248 ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; 2249 } 2250 2251 /* ** Driver registration ** */ 2252 static device_method_t netfront_methods[] = { 2253 /* Device interface */ 2254 DEVMETHOD(device_probe, netfront_probe), 2255 DEVMETHOD(device_attach, netfront_attach), 2256 DEVMETHOD(device_detach, netfront_detach), 2257 DEVMETHOD(device_shutdown, bus_generic_shutdown), 2258 DEVMETHOD(device_suspend, netfront_suspend), 2259 DEVMETHOD(device_resume, netfront_resume), 2260 2261 /* Xenbus interface */ 2262 DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed), 2263 2264 DEVMETHOD_END 2265 }; 2266 2267 static driver_t netfront_driver = { 2268 "xn", 2269 netfront_methods, 2270 sizeof(struct netfront_info), 2271 }; 2272 devclass_t netfront_devclass; 2273 2274 DRIVER_MODULE(xe, xenbusb_front, netfront_driver, netfront_devclass, NULL, 2275 NULL); 2276