1 /* 2 * 3 * Copyright (c) 2004-2006 Kip Macy 4 * All rights reserved. 5 * 6 * 7 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 8 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 9 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 10 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 11 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 12 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 13 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 14 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 15 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 16 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 17 */ 18 19 20 #include <sys/cdefs.h> 21 __FBSDID("$FreeBSD$"); 22 23 #include <sys/param.h> 24 #include <sys/systm.h> 25 #include <sys/sockio.h> 26 #include <sys/mbuf.h> 27 #include <sys/malloc.h> 28 #include <sys/module.h> 29 #include <sys/kernel.h> 30 #include <sys/socket.h> 31 #include <sys/sysctl.h> 32 #include <sys/queue.h> 33 #include <sys/sx.h> 34 35 #include <net/if.h> 36 #include <net/if_arp.h> 37 #include <net/ethernet.h> 38 #include <net/if_dl.h> 39 #include <net/if_media.h> 40 41 #include <net/bpf.h> 42 43 #include <net/if_types.h> 44 #include <net/if.h> 45 46 #include <netinet/in_systm.h> 47 #include <netinet/in.h> 48 #include <netinet/ip.h> 49 #include <netinet/if_ether.h> 50 #if __FreeBSD_version >= 700000 51 #include <netinet/tcp.h> 52 #include <netinet/tcp_lro.h> 53 #endif 54 55 #include <vm/vm.h> 56 #include <vm/pmap.h> 57 58 #include <machine/clock.h> /* for DELAY */ 59 #include <machine/bus.h> 60 #include <machine/resource.h> 61 #include <machine/frame.h> 62 #include <machine/vmparam.h> 63 64 #include <sys/bus.h> 65 #include <sys/rman.h> 66 67 #include <machine/intr_machdep.h> 68 69 #include <machine/xen/xen-os.h> 70 #include <machine/xen/xenfunc.h> 71 #include <xen/hypervisor.h> 72 #include <xen/xen_intr.h> 73 #include <xen/evtchn.h> 74 #include <xen/gnttab.h> 75 #include <xen/interface/memory.h> 76 #include <xen/interface/io/netif.h> 77 #include <xen/xenbus/xenbusvar.h> 78 79 #include <dev/xen/netfront/mbufq.h> 80 81 #include "xenbus_if.h" 82 83 #define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP | CSUM_TSO) 84 85 #define GRANT_INVALID_REF 0 86 87 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) 88 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) 89 90 #if __FreeBSD_version >= 700000 91 /* 92 * Should the driver do LRO on the RX end 93 * this can be toggled on the fly, but the 94 * interface must be reset (down/up) for it 95 * to take effect. 96 */ 97 static int xn_enable_lro = 1; 98 TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro); 99 #else 100 101 #define IFCAP_TSO4 0 102 #define CSUM_TSO 0 103 104 #endif 105 106 #ifdef CONFIG_XEN 107 static int MODPARM_rx_copy = 0; 108 module_param_named(rx_copy, MODPARM_rx_copy, bool, 0); 109 MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)"); 110 static int MODPARM_rx_flip = 0; 111 module_param_named(rx_flip, MODPARM_rx_flip, bool, 0); 112 MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)"); 113 #else 114 static const int MODPARM_rx_copy = 1; 115 static const int MODPARM_rx_flip = 0; 116 #endif 117 118 #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2) 119 #define RX_COPY_THRESHOLD 256 120 121 #define net_ratelimit() 0 122 123 struct netfront_info; 124 struct netfront_rx_info; 125 126 static void xn_txeof(struct netfront_info *); 127 static void xn_rxeof(struct netfront_info *); 128 static void network_alloc_rx_buffers(struct netfront_info *); 129 130 static void xn_tick_locked(struct netfront_info *); 131 static void xn_tick(void *); 132 133 static void xn_intr(void *); 134 static void xn_start_locked(struct ifnet *); 135 static void xn_start(struct ifnet *); 136 static int xn_ioctl(struct ifnet *, u_long, caddr_t); 137 static void xn_ifinit_locked(struct netfront_info *); 138 static void xn_ifinit(void *); 139 static void xn_stop(struct netfront_info *); 140 #ifdef notyet 141 static void xn_watchdog(struct ifnet *); 142 #endif 143 144 static void show_device(struct netfront_info *sc); 145 #ifdef notyet 146 static void netfront_closing(device_t dev); 147 #endif 148 static void netif_free(struct netfront_info *info); 149 static int netfront_detach(device_t dev); 150 151 static int talk_to_backend(device_t dev, struct netfront_info *info); 152 static int create_netdev(device_t dev); 153 static void netif_disconnect_backend(struct netfront_info *info); 154 static int setup_device(device_t dev, struct netfront_info *info); 155 static void end_access(int ref, void *page); 156 157 /* Xenolinux helper functions */ 158 int network_connect(struct netfront_info *); 159 160 static void xn_free_rx_ring(struct netfront_info *); 161 162 static void xn_free_tx_ring(struct netfront_info *); 163 164 static int xennet_get_responses(struct netfront_info *np, 165 struct netfront_rx_info *rinfo, RING_IDX rp, struct mbuf **list, 166 int *pages_flipped_p); 167 168 #define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT) 169 170 #define INVALID_P2M_ENTRY (~0UL) 171 172 /* 173 * Mbuf pointers. We need these to keep track of the virtual addresses 174 * of our mbuf chains since we can only convert from virtual to physical, 175 * not the other way around. The size must track the free index arrays. 176 */ 177 struct xn_chain_data { 178 struct mbuf *xn_tx_chain[NET_TX_RING_SIZE+1]; 179 struct mbuf *xn_rx_chain[NET_RX_RING_SIZE+1]; 180 }; 181 182 183 struct net_device_stats 184 { 185 u_long rx_packets; /* total packets received */ 186 u_long tx_packets; /* total packets transmitted */ 187 u_long rx_bytes; /* total bytes received */ 188 u_long tx_bytes; /* total bytes transmitted */ 189 u_long rx_errors; /* bad packets received */ 190 u_long tx_errors; /* packet transmit problems */ 191 u_long rx_dropped; /* no space in linux buffers */ 192 u_long tx_dropped; /* no space available in linux */ 193 u_long multicast; /* multicast packets received */ 194 u_long collisions; 195 196 /* detailed rx_errors: */ 197 u_long rx_length_errors; 198 u_long rx_over_errors; /* receiver ring buff overflow */ 199 u_long rx_crc_errors; /* recved pkt with crc error */ 200 u_long rx_frame_errors; /* recv'd frame alignment error */ 201 u_long rx_fifo_errors; /* recv'r fifo overrun */ 202 u_long rx_missed_errors; /* receiver missed packet */ 203 204 /* detailed tx_errors */ 205 u_long tx_aborted_errors; 206 u_long tx_carrier_errors; 207 u_long tx_fifo_errors; 208 u_long tx_heartbeat_errors; 209 u_long tx_window_errors; 210 211 /* for cslip etc */ 212 u_long rx_compressed; 213 u_long tx_compressed; 214 }; 215 216 struct netfront_info { 217 218 struct ifnet *xn_ifp; 219 #if __FreeBSD_version >= 700000 220 struct lro_ctrl xn_lro; 221 #endif 222 223 struct net_device_stats stats; 224 u_int tx_full; 225 226 netif_tx_front_ring_t tx; 227 netif_rx_front_ring_t rx; 228 229 struct mtx tx_lock; 230 struct mtx rx_lock; 231 struct sx sc_lock; 232 233 u_int handle; 234 u_int irq; 235 u_int copying_receiver; 236 u_int carrier; 237 238 /* Receive-ring batched refills. */ 239 #define RX_MIN_TARGET 32 240 #define RX_MAX_TARGET NET_RX_RING_SIZE 241 int rx_min_target, rx_max_target, rx_target; 242 243 /* 244 * {tx,rx}_skbs store outstanding skbuffs. The first entry in each 245 * array is an index into a chain of free entries. 246 */ 247 248 grant_ref_t gref_tx_head; 249 grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; 250 grant_ref_t gref_rx_head; 251 grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1]; 252 253 #define TX_MAX_TARGET min(NET_RX_RING_SIZE, 256) 254 device_t xbdev; 255 int tx_ring_ref; 256 int rx_ring_ref; 257 uint8_t mac[ETHER_ADDR_LEN]; 258 struct xn_chain_data xn_cdata; /* mbufs */ 259 struct mbuf_head xn_rx_batch; /* head of the batch queue */ 260 261 int xn_if_flags; 262 struct callout xn_stat_ch; 263 264 u_long rx_pfn_array[NET_RX_RING_SIZE]; 265 multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1]; 266 mmu_update_t rx_mmu[NET_RX_RING_SIZE]; 267 }; 268 269 #define rx_mbufs xn_cdata.xn_rx_chain 270 #define tx_mbufs xn_cdata.xn_tx_chain 271 272 #define XN_LOCK_INIT(_sc, _name) \ 273 mtx_init(&(_sc)->tx_lock, #_name"_tx", "network transmit lock", MTX_DEF); \ 274 mtx_init(&(_sc)->rx_lock, #_name"_rx", "network receive lock", MTX_DEF); \ 275 sx_init(&(_sc)->sc_lock, #_name"_rx") 276 277 #define XN_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_lock) 278 #define XN_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_lock) 279 280 #define XN_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_lock) 281 #define XN_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_lock) 282 283 #define XN_LOCK(_sc) sx_xlock(&(_sc)->sc_lock); 284 #define XN_UNLOCK(_sc) sx_xunlock(&(_sc)->sc_lock); 285 286 #define XN_LOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_lock, SX_LOCKED); 287 #define XN_RX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->rx_lock, MA_OWNED); 288 #define XN_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_lock, MA_OWNED); 289 #define XN_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_lock); \ 290 mtx_destroy(&(_sc)->tx_lock); \ 291 sx_destroy(&(_sc)->sc_lock); 292 293 struct netfront_rx_info { 294 struct netif_rx_response rx; 295 struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; 296 }; 297 298 #define netfront_carrier_on(netif) ((netif)->carrier = 1) 299 #define netfront_carrier_off(netif) ((netif)->carrier = 0) 300 #define netfront_carrier_ok(netif) ((netif)->carrier) 301 302 /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */ 303 304 305 306 /* 307 * Access macros for acquiring freeing slots in tx_skbs[]. 308 */ 309 310 static inline void 311 add_id_to_freelist(struct mbuf **list, unsigned short id) 312 { 313 list[id] = list[0]; 314 list[0] = (void *)(u_long)id; 315 } 316 317 static inline unsigned short 318 get_id_from_freelist(struct mbuf **list) 319 { 320 u_int id = (u_int)(u_long)list[0]; 321 list[0] = list[id]; 322 return (id); 323 } 324 325 static inline int 326 xennet_rxidx(RING_IDX idx) 327 { 328 return idx & (NET_RX_RING_SIZE - 1); 329 } 330 331 static inline struct mbuf * 332 xennet_get_rx_mbuf(struct netfront_info *np, 333 RING_IDX ri) 334 { 335 int i = xennet_rxidx(ri); 336 struct mbuf *m; 337 338 m = np->rx_mbufs[i]; 339 np->rx_mbufs[i] = NULL; 340 return (m); 341 } 342 343 static inline grant_ref_t 344 xennet_get_rx_ref(struct netfront_info *np, RING_IDX ri) 345 { 346 int i = xennet_rxidx(ri); 347 grant_ref_t ref = np->grant_rx_ref[i]; 348 np->grant_rx_ref[i] = GRANT_INVALID_REF; 349 return ref; 350 } 351 352 #ifdef DEBUG 353 354 #endif 355 #define IPRINTK(fmt, args...) \ 356 printf("[XEN] " fmt, ##args) 357 #define WPRINTK(fmt, args...) \ 358 printf("[XEN] " fmt, ##args) 359 #if 0 360 #define DPRINTK(fmt, args...) \ 361 printf("[XEN] %s: " fmt, __func__, ##args) 362 #else 363 #define DPRINTK(fmt, args...) 364 #endif 365 366 /** 367 * Read the 'mac' node at the given device's node in the store, and parse that 368 * as colon-separated octets, placing result the given mac array. mac must be 369 * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h). 370 * Return 0 on success, or errno on error. 371 */ 372 static int 373 xen_net_read_mac(device_t dev, uint8_t mac[]) 374 { 375 int error, i; 376 char *s, *e, *macstr; 377 378 error = xenbus_read(XBT_NIL, xenbus_get_node(dev), "mac", NULL, 379 (void **) &macstr); 380 if (error) 381 return (error); 382 383 s = macstr; 384 for (i = 0; i < ETHER_ADDR_LEN; i++) { 385 mac[i] = strtoul(s, &e, 16); 386 if (s == e || (e[0] != ':' && e[0] != 0)) { 387 free(macstr, M_DEVBUF); 388 return (ENOENT); 389 } 390 s = &e[1]; 391 } 392 free(macstr, M_DEVBUF); 393 return (0); 394 } 395 396 /** 397 * Entry point to this code when a new device is created. Allocate the basic 398 * structures and the ring buffers for communication with the backend, and 399 * inform the backend of the appropriate details for those. Switch to 400 * Connected state. 401 */ 402 static int 403 netfront_probe(device_t dev) 404 { 405 406 if (!strcmp(xenbus_get_type(dev), "vif")) { 407 device_set_desc(dev, "Virtual Network Interface"); 408 return (0); 409 } 410 411 return (ENXIO); 412 } 413 414 static int 415 netfront_attach(device_t dev) 416 { 417 int err; 418 419 err = create_netdev(dev); 420 if (err) { 421 xenbus_dev_fatal(dev, err, "creating netdev"); 422 return err; 423 } 424 425 #if __FreeBSD_version >= 700000 426 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), 427 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 428 OID_AUTO, "enable_lro", CTLTYPE_INT|CTLFLAG_RW, 429 &xn_enable_lro, 0, "Large Receive Offload"); 430 #endif 431 432 return 0; 433 } 434 435 436 /** 437 * We are reconnecting to the backend, due to a suspend/resume, or a backend 438 * driver restart. We tear down our netif structure and recreate it, but 439 * leave the device-layer structures intact so that this is transparent to the 440 * rest of the kernel. 441 */ 442 static int 443 netfront_resume(device_t dev) 444 { 445 struct netfront_info *info = device_get_softc(dev); 446 447 netif_disconnect_backend(info); 448 return (0); 449 } 450 451 452 /* Common code used when first setting up, and when resuming. */ 453 static int 454 talk_to_backend(device_t dev, struct netfront_info *info) 455 { 456 const char *message; 457 struct xenbus_transaction xbt; 458 const char *node = xenbus_get_node(dev); 459 int err; 460 461 err = xen_net_read_mac(dev, info->mac); 462 if (err) { 463 xenbus_dev_fatal(dev, err, "parsing %s/mac", node); 464 goto out; 465 } 466 467 /* Create shared ring, alloc event channel. */ 468 err = setup_device(dev, info); 469 if (err) 470 goto out; 471 472 again: 473 err = xenbus_transaction_start(&xbt); 474 if (err) { 475 xenbus_dev_fatal(dev, err, "starting transaction"); 476 goto destroy_ring; 477 } 478 err = xenbus_printf(xbt, node, "tx-ring-ref","%u", 479 info->tx_ring_ref); 480 if (err) { 481 message = "writing tx ring-ref"; 482 goto abort_transaction; 483 } 484 err = xenbus_printf(xbt, node, "rx-ring-ref","%u", 485 info->rx_ring_ref); 486 if (err) { 487 message = "writing rx ring-ref"; 488 goto abort_transaction; 489 } 490 err = xenbus_printf(xbt, node, 491 "event-channel", "%u", irq_to_evtchn_port(info->irq)); 492 if (err) { 493 message = "writing event-channel"; 494 goto abort_transaction; 495 } 496 err = xenbus_printf(xbt, node, "request-rx-copy", "%u", 497 info->copying_receiver); 498 if (err) { 499 message = "writing request-rx-copy"; 500 goto abort_transaction; 501 } 502 err = xenbus_printf(xbt, node, "feature-rx-notify", "%d", 1); 503 if (err) { 504 message = "writing feature-rx-notify"; 505 goto abort_transaction; 506 } 507 err = xenbus_printf(xbt, node, "feature-sg", "%d", 1); 508 if (err) { 509 message = "writing feature-sg"; 510 goto abort_transaction; 511 } 512 #if __FreeBSD_version >= 700000 513 err = xenbus_printf(xbt, node, "feature-gso-tcpv4", "%d", 1); 514 if (err) { 515 message = "writing feature-gso-tcpv4"; 516 goto abort_transaction; 517 } 518 #endif 519 520 err = xenbus_transaction_end(xbt, 0); 521 if (err) { 522 if (err == EAGAIN) 523 goto again; 524 xenbus_dev_fatal(dev, err, "completing transaction"); 525 goto destroy_ring; 526 } 527 528 return 0; 529 530 abort_transaction: 531 xenbus_transaction_end(xbt, 1); 532 xenbus_dev_fatal(dev, err, "%s", message); 533 destroy_ring: 534 netif_free(info); 535 out: 536 return err; 537 } 538 539 540 static int 541 setup_device(device_t dev, struct netfront_info *info) 542 { 543 netif_tx_sring_t *txs; 544 netif_rx_sring_t *rxs; 545 int error; 546 struct ifnet *ifp; 547 548 ifp = info->xn_ifp; 549 550 info->tx_ring_ref = GRANT_INVALID_REF; 551 info->rx_ring_ref = GRANT_INVALID_REF; 552 info->rx.sring = NULL; 553 info->tx.sring = NULL; 554 info->irq = 0; 555 556 txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); 557 if (!txs) { 558 error = ENOMEM; 559 xenbus_dev_fatal(dev, error, "allocating tx ring page"); 560 goto fail; 561 } 562 SHARED_RING_INIT(txs); 563 FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); 564 error = xenbus_grant_ring(dev, virt_to_mfn(txs), &info->tx_ring_ref); 565 if (error) 566 goto fail; 567 568 rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); 569 if (!rxs) { 570 error = ENOMEM; 571 xenbus_dev_fatal(dev, error, "allocating rx ring page"); 572 goto fail; 573 } 574 SHARED_RING_INIT(rxs); 575 FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); 576 577 error = xenbus_grant_ring(dev, virt_to_mfn(rxs), &info->rx_ring_ref); 578 if (error) 579 goto fail; 580 581 error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev), 582 "xn", xn_intr, info, INTR_TYPE_NET | INTR_MPSAFE, &info->irq); 583 584 if (error) { 585 xenbus_dev_fatal(dev, error, 586 "bind_evtchn_to_irqhandler failed"); 587 goto fail; 588 } 589 590 show_device(info); 591 592 return (0); 593 594 fail: 595 netif_free(info); 596 return (error); 597 } 598 599 /** 600 * If this interface has an ipv4 address, send an arp for it. This 601 * helps to get the network going again after migrating hosts. 602 */ 603 static void 604 netfront_send_fake_arp(device_t dev, struct netfront_info *info) 605 { 606 struct ifnet *ifp; 607 struct ifaddr *ifa; 608 609 ifp = info->xn_ifp; 610 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 611 if (ifa->ifa_addr->sa_family == AF_INET) { 612 arp_ifinit(ifp, ifa); 613 } 614 } 615 } 616 617 /** 618 * Callback received when the backend's state changes. 619 */ 620 static void 621 netfront_backend_changed(device_t dev, XenbusState newstate) 622 { 623 struct netfront_info *sc = device_get_softc(dev); 624 625 DPRINTK("newstate=%d\n", newstate); 626 627 switch (newstate) { 628 case XenbusStateInitialising: 629 case XenbusStateInitialised: 630 case XenbusStateConnected: 631 case XenbusStateUnknown: 632 case XenbusStateClosed: 633 case XenbusStateReconfigured: 634 case XenbusStateReconfiguring: 635 break; 636 case XenbusStateInitWait: 637 if (xenbus_get_state(dev) != XenbusStateInitialising) 638 break; 639 if (network_connect(sc) != 0) 640 break; 641 xenbus_set_state(dev, XenbusStateConnected); 642 netfront_send_fake_arp(dev, sc); 643 break; 644 case XenbusStateClosing: 645 xenbus_set_state(dev, XenbusStateClosed); 646 break; 647 } 648 } 649 650 static void 651 xn_free_rx_ring(struct netfront_info *sc) 652 { 653 #if 0 654 int i; 655 656 for (i = 0; i < NET_RX_RING_SIZE; i++) { 657 if (sc->xn_cdata.xn_rx_chain[i] != NULL) { 658 m_freem(sc->xn_cdata.xn_rx_chain[i]); 659 sc->xn_cdata.xn_rx_chain[i] = NULL; 660 } 661 } 662 663 sc->rx.rsp_cons = 0; 664 sc->xn_rx_if->req_prod = 0; 665 sc->xn_rx_if->event = sc->rx.rsp_cons ; 666 #endif 667 } 668 669 static void 670 xn_free_tx_ring(struct netfront_info *sc) 671 { 672 #if 0 673 int i; 674 675 for (i = 0; i < NET_TX_RING_SIZE; i++) { 676 if (sc->xn_cdata.xn_tx_chain[i] != NULL) { 677 m_freem(sc->xn_cdata.xn_tx_chain[i]); 678 sc->xn_cdata.xn_tx_chain[i] = NULL; 679 } 680 } 681 682 return; 683 #endif 684 } 685 686 static inline int 687 netfront_tx_slot_available(struct netfront_info *np) 688 { 689 return ((np->tx.req_prod_pvt - np->tx.rsp_cons) < 690 (TX_MAX_TARGET - /* MAX_SKB_FRAGS */ 24 - 2)); 691 } 692 static void 693 netif_release_tx_bufs(struct netfront_info *np) 694 { 695 struct mbuf *m; 696 int i; 697 698 for (i = 1; i <= NET_TX_RING_SIZE; i++) { 699 m = np->xn_cdata.xn_tx_chain[i]; 700 701 if (((u_long)m) < KERNBASE) 702 continue; 703 gnttab_grant_foreign_access_ref(np->grant_tx_ref[i], 704 xenbus_get_otherend_id(np->xbdev), 705 virt_to_mfn(mtod(m, vm_offset_t)), 706 GNTMAP_readonly); 707 gnttab_release_grant_reference(&np->gref_tx_head, 708 np->grant_tx_ref[i]); 709 np->grant_tx_ref[i] = GRANT_INVALID_REF; 710 add_id_to_freelist(np->tx_mbufs, i); 711 m_freem(m); 712 } 713 } 714 715 static void 716 network_alloc_rx_buffers(struct netfront_info *sc) 717 { 718 int otherend_id = xenbus_get_otherend_id(sc->xbdev); 719 unsigned short id; 720 struct mbuf *m_new; 721 int i, batch_target, notify; 722 RING_IDX req_prod; 723 struct xen_memory_reservation reservation; 724 grant_ref_t ref; 725 int nr_flips; 726 netif_rx_request_t *req; 727 vm_offset_t vaddr; 728 u_long pfn; 729 730 req_prod = sc->rx.req_prod_pvt; 731 732 if (unlikely(sc->carrier == 0)) 733 return; 734 735 /* 736 * Allocate skbuffs greedily, even though we batch updates to the 737 * receive ring. This creates a less bursty demand on the memory 738 * allocator, so should reduce the chance of failed allocation 739 * requests both for ourself and for other kernel subsystems. 740 */ 741 batch_target = sc->rx_target - (req_prod - sc->rx.rsp_cons); 742 for (i = mbufq_len(&sc->xn_rx_batch); i < batch_target; i++) { 743 MGETHDR(m_new, M_DONTWAIT, MT_DATA); 744 if (m_new == NULL) 745 goto no_mbuf; 746 747 m_cljget(m_new, M_DONTWAIT, MJUMPAGESIZE); 748 if ((m_new->m_flags & M_EXT) == 0) { 749 m_freem(m_new); 750 751 no_mbuf: 752 if (i != 0) 753 goto refill; 754 /* 755 * XXX set timer 756 */ 757 break; 758 } 759 m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE; 760 761 /* queue the mbufs allocated */ 762 mbufq_tail(&sc->xn_rx_batch, m_new); 763 } 764 765 /* Is the batch large enough to be worthwhile? */ 766 if (i < (sc->rx_target/2)) { 767 if (req_prod >sc->rx.sring->req_prod) 768 goto push; 769 return; 770 } 771 /* Adjust floating fill target if we risked running out of buffers. */ 772 if ( ((req_prod - sc->rx.sring->rsp_prod) < (sc->rx_target / 4)) && 773 ((sc->rx_target *= 2) > sc->rx_max_target) ) 774 sc->rx_target = sc->rx_max_target; 775 776 refill: 777 for (nr_flips = i = 0; ; i++) { 778 if ((m_new = mbufq_dequeue(&sc->xn_rx_batch)) == NULL) 779 break; 780 781 m_new->m_ext.ext_arg1 = (vm_paddr_t *)(uintptr_t)( 782 vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT); 783 784 id = xennet_rxidx(req_prod + i); 785 786 KASSERT(sc->xn_cdata.xn_rx_chain[id] == NULL, 787 ("non-NULL xm_rx_chain")); 788 sc->xn_cdata.xn_rx_chain[id] = m_new; 789 790 ref = gnttab_claim_grant_reference(&sc->gref_rx_head); 791 KASSERT((short)ref >= 0, ("negative ref")); 792 sc->grant_rx_ref[id] = ref; 793 794 vaddr = mtod(m_new, vm_offset_t); 795 pfn = vtophys(vaddr) >> PAGE_SHIFT; 796 req = RING_GET_REQUEST(&sc->rx, req_prod + i); 797 798 if (sc->copying_receiver == 0) { 799 gnttab_grant_foreign_transfer_ref(ref, 800 otherend_id, pfn); 801 sc->rx_pfn_array[nr_flips] = PFNTOMFN(pfn); 802 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 803 /* Remove this page before passing 804 * back to Xen. 805 */ 806 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 807 MULTI_update_va_mapping(&sc->rx_mcl[i], 808 vaddr, 0, 0); 809 } 810 nr_flips++; 811 } else { 812 gnttab_grant_foreign_access_ref(ref, 813 otherend_id, 814 PFNTOMFN(pfn), 0); 815 } 816 req->id = id; 817 req->gref = ref; 818 819 sc->rx_pfn_array[i] = 820 vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT; 821 } 822 823 KASSERT(i, ("no mbufs processed")); /* should have returned earlier */ 824 KASSERT(mbufq_len(&sc->xn_rx_batch) == 0, ("not all mbufs processed")); 825 /* 826 * We may have allocated buffers which have entries outstanding 827 * in the page * update queue -- make sure we flush those first! 828 */ 829 PT_UPDATES_FLUSH(); 830 if (nr_flips != 0) { 831 #ifdef notyet 832 /* Tell the ballon driver what is going on. */ 833 balloon_update_driver_allowance(i); 834 #endif 835 set_xen_guest_handle(reservation.extent_start, sc->rx_pfn_array); 836 reservation.nr_extents = i; 837 reservation.extent_order = 0; 838 reservation.address_bits = 0; 839 reservation.domid = DOMID_SELF; 840 841 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 842 843 /* After all PTEs have been zapped, flush the TLB. */ 844 sc->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = 845 UVMF_TLB_FLUSH|UVMF_ALL; 846 847 /* Give away a batch of pages. */ 848 sc->rx_mcl[i].op = __HYPERVISOR_memory_op; 849 sc->rx_mcl[i].args[0] = XENMEM_decrease_reservation; 850 sc->rx_mcl[i].args[1] = (u_long)&reservation; 851 /* Zap PTEs and give away pages in one big multicall. */ 852 (void)HYPERVISOR_multicall(sc->rx_mcl, i+1); 853 854 /* Check return status of HYPERVISOR_dom_mem_op(). */ 855 if (unlikely(sc->rx_mcl[i].result != i)) 856 panic("Unable to reduce memory reservation\n"); 857 } else { 858 if (HYPERVISOR_memory_op( 859 XENMEM_decrease_reservation, &reservation) 860 != i) 861 panic("Unable to reduce memory " 862 "reservation\n"); 863 } 864 } else { 865 wmb(); 866 } 867 868 /* Above is a suitable barrier to ensure backend will see requests. */ 869 sc->rx.req_prod_pvt = req_prod + i; 870 push: 871 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->rx, notify); 872 if (notify) 873 notify_remote_via_irq(sc->irq); 874 } 875 876 static void 877 xn_rxeof(struct netfront_info *np) 878 { 879 struct ifnet *ifp; 880 #if __FreeBSD_version >= 700000 881 struct lro_ctrl *lro = &np->xn_lro; 882 struct lro_entry *queued; 883 #endif 884 struct netfront_rx_info rinfo; 885 struct netif_rx_response *rx = &rinfo.rx; 886 struct netif_extra_info *extras = rinfo.extras; 887 RING_IDX i, rp; 888 multicall_entry_t *mcl; 889 struct mbuf *m; 890 struct mbuf_head rxq, errq; 891 int err, pages_flipped = 0, work_to_do; 892 893 do { 894 XN_RX_LOCK_ASSERT(np); 895 if (!netfront_carrier_ok(np)) 896 return; 897 898 mbufq_init(&errq); 899 mbufq_init(&rxq); 900 901 ifp = np->xn_ifp; 902 903 rp = np->rx.sring->rsp_prod; 904 rmb(); /* Ensure we see queued responses up to 'rp'. */ 905 906 i = np->rx.rsp_cons; 907 while ((i != rp)) { 908 memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx)); 909 memset(extras, 0, sizeof(rinfo.extras)); 910 911 m = NULL; 912 err = xennet_get_responses(np, &rinfo, rp, &m, 913 &pages_flipped); 914 915 if (unlikely(err)) { 916 if (m) 917 mbufq_tail(&errq, m); 918 np->stats.rx_errors++; 919 i = np->rx.rsp_cons; 920 continue; 921 } 922 923 m->m_pkthdr.rcvif = ifp; 924 if ( rx->flags & NETRXF_data_validated ) { 925 /* Tell the stack the checksums are okay */ 926 /* 927 * XXX this isn't necessarily the case - need to add 928 * check 929 */ 930 931 m->m_pkthdr.csum_flags |= 932 (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID 933 | CSUM_PSEUDO_HDR); 934 m->m_pkthdr.csum_data = 0xffff; 935 } 936 937 np->stats.rx_packets++; 938 np->stats.rx_bytes += m->m_pkthdr.len; 939 940 mbufq_tail(&rxq, m); 941 np->rx.rsp_cons = ++i; 942 } 943 944 if (pages_flipped) { 945 /* Some pages are no longer absent... */ 946 #ifdef notyet 947 balloon_update_driver_allowance(-pages_flipped); 948 #endif 949 /* Do all the remapping work, and M->P updates, in one big 950 * hypercall. 951 */ 952 if (!!xen_feature(XENFEAT_auto_translated_physmap)) { 953 mcl = np->rx_mcl + pages_flipped; 954 mcl->op = __HYPERVISOR_mmu_update; 955 mcl->args[0] = (u_long)np->rx_mmu; 956 mcl->args[1] = pages_flipped; 957 mcl->args[2] = 0; 958 mcl->args[3] = DOMID_SELF; 959 (void)HYPERVISOR_multicall(np->rx_mcl, 960 pages_flipped + 1); 961 } 962 } 963 964 while ((m = mbufq_dequeue(&errq))) 965 m_freem(m); 966 967 /* 968 * Process all the mbufs after the remapping is complete. 969 * Break the mbuf chain first though. 970 */ 971 while ((m = mbufq_dequeue(&rxq)) != NULL) { 972 ifp->if_ipackets++; 973 974 /* 975 * Do we really need to drop the rx lock? 976 */ 977 XN_RX_UNLOCK(np); 978 #if __FreeBSD_version >= 700000 979 /* Use LRO if possible */ 980 if ((ifp->if_capenable & IFCAP_LRO) == 0 || 981 lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) { 982 /* 983 * If LRO fails, pass up to the stack 984 * directly. 985 */ 986 (*ifp->if_input)(ifp, m); 987 } 988 #else 989 (*ifp->if_input)(ifp, m); 990 #endif 991 XN_RX_LOCK(np); 992 } 993 994 np->rx.rsp_cons = i; 995 996 #if __FreeBSD_version >= 700000 997 /* 998 * Flush any outstanding LRO work 999 */ 1000 while (!SLIST_EMPTY(&lro->lro_active)) { 1001 queued = SLIST_FIRST(&lro->lro_active); 1002 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1003 tcp_lro_flush(lro, queued); 1004 } 1005 #endif 1006 1007 #if 0 1008 /* If we get a callback with very few responses, reduce fill target. */ 1009 /* NB. Note exponential increase, linear decrease. */ 1010 if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > 1011 ((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target)) 1012 np->rx_target = np->rx_min_target; 1013 #endif 1014 1015 network_alloc_rx_buffers(np); 1016 1017 RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, work_to_do); 1018 } while (work_to_do); 1019 } 1020 1021 static void 1022 xn_txeof(struct netfront_info *np) 1023 { 1024 RING_IDX i, prod; 1025 unsigned short id; 1026 struct ifnet *ifp; 1027 netif_tx_response_t *txr; 1028 struct mbuf *m; 1029 1030 XN_TX_LOCK_ASSERT(np); 1031 1032 if (!netfront_carrier_ok(np)) 1033 return; 1034 1035 ifp = np->xn_ifp; 1036 ifp->if_timer = 0; 1037 1038 do { 1039 prod = np->tx.sring->rsp_prod; 1040 rmb(); /* Ensure we see responses up to 'rp'. */ 1041 1042 for (i = np->tx.rsp_cons; i != prod; i++) { 1043 txr = RING_GET_RESPONSE(&np->tx, i); 1044 if (txr->status == NETIF_RSP_NULL) 1045 continue; 1046 1047 id = txr->id; 1048 m = np->xn_cdata.xn_tx_chain[id]; 1049 1050 /* 1051 * Increment packet count if this is the last 1052 * mbuf of the chain. 1053 */ 1054 if (!m->m_next) 1055 ifp->if_opackets++; 1056 KASSERT(m != NULL, ("mbuf not found in xn_tx_chain")); 1057 M_ASSERTVALID(m); 1058 if (unlikely(gnttab_query_foreign_access( 1059 np->grant_tx_ref[id]) != 0)) { 1060 printf("network_tx_buf_gc: warning " 1061 "-- grant still in use by backend " 1062 "domain.\n"); 1063 goto out; 1064 } 1065 gnttab_end_foreign_access_ref( 1066 np->grant_tx_ref[id]); 1067 gnttab_release_grant_reference( 1068 &np->gref_tx_head, np->grant_tx_ref[id]); 1069 np->grant_tx_ref[id] = GRANT_INVALID_REF; 1070 1071 np->xn_cdata.xn_tx_chain[id] = NULL; 1072 add_id_to_freelist(np->xn_cdata.xn_tx_chain, id); 1073 m_free(m); 1074 } 1075 np->tx.rsp_cons = prod; 1076 1077 /* 1078 * Set a new event, then check for race with update of 1079 * tx_cons. Note that it is essential to schedule a 1080 * callback, no matter how few buffers are pending. Even if 1081 * there is space in the transmit ring, higher layers may 1082 * be blocked because too much data is outstanding: in such 1083 * cases notification from Xen is likely to be the only kick 1084 * that we'll get. 1085 */ 1086 np->tx.sring->rsp_event = 1087 prod + ((np->tx.sring->req_prod - prod) >> 1) + 1; 1088 1089 mb(); 1090 1091 } while (prod != np->tx.sring->rsp_prod); 1092 1093 out: 1094 if (np->tx_full && 1095 ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE)) { 1096 np->tx_full = 0; 1097 #if 0 1098 if (np->user_state == UST_OPEN) 1099 netif_wake_queue(dev); 1100 #endif 1101 } 1102 1103 } 1104 1105 static void 1106 xn_intr(void *xsc) 1107 { 1108 struct netfront_info *np = xsc; 1109 struct ifnet *ifp = np->xn_ifp; 1110 1111 #if 0 1112 if (!(np->rx.rsp_cons != np->rx.sring->rsp_prod && 1113 likely(netfront_carrier_ok(np)) && 1114 ifp->if_drv_flags & IFF_DRV_RUNNING)) 1115 return; 1116 #endif 1117 if (np->tx.rsp_cons != np->tx.sring->rsp_prod) { 1118 XN_TX_LOCK(np); 1119 xn_txeof(np); 1120 XN_TX_UNLOCK(np); 1121 } 1122 1123 XN_RX_LOCK(np); 1124 xn_rxeof(np); 1125 XN_RX_UNLOCK(np); 1126 1127 if (ifp->if_drv_flags & IFF_DRV_RUNNING && 1128 !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) 1129 xn_start(ifp); 1130 } 1131 1132 1133 static void 1134 xennet_move_rx_slot(struct netfront_info *np, struct mbuf *m, 1135 grant_ref_t ref) 1136 { 1137 int new = xennet_rxidx(np->rx.req_prod_pvt); 1138 1139 KASSERT(np->rx_mbufs[new] == NULL, ("rx_mbufs != NULL")); 1140 np->rx_mbufs[new] = m; 1141 np->grant_rx_ref[new] = ref; 1142 RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new; 1143 RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref; 1144 np->rx.req_prod_pvt++; 1145 } 1146 1147 static int 1148 xennet_get_extras(struct netfront_info *np, 1149 struct netif_extra_info *extras, RING_IDX rp) 1150 { 1151 struct netif_extra_info *extra; 1152 RING_IDX cons = np->rx.rsp_cons; 1153 1154 int err = 0; 1155 1156 do { 1157 struct mbuf *m; 1158 grant_ref_t ref; 1159 1160 if (unlikely(cons + 1 == rp)) { 1161 #if 0 1162 if (net_ratelimit()) 1163 WPRINTK("Missing extra info\n"); 1164 #endif 1165 err = -EINVAL; 1166 break; 1167 } 1168 1169 extra = (struct netif_extra_info *) 1170 RING_GET_RESPONSE(&np->rx, ++cons); 1171 1172 if (unlikely(!extra->type || 1173 extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 1174 #if 0 1175 if (net_ratelimit()) 1176 WPRINTK("Invalid extra type: %d\n", 1177 extra->type); 1178 #endif 1179 err = -EINVAL; 1180 } else { 1181 memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); 1182 } 1183 1184 m = xennet_get_rx_mbuf(np, cons); 1185 ref = xennet_get_rx_ref(np, cons); 1186 xennet_move_rx_slot(np, m, ref); 1187 } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); 1188 1189 np->rx.rsp_cons = cons; 1190 return err; 1191 } 1192 1193 static int 1194 xennet_get_responses(struct netfront_info *np, 1195 struct netfront_rx_info *rinfo, RING_IDX rp, 1196 struct mbuf **list, 1197 int *pages_flipped_p) 1198 { 1199 int pages_flipped = *pages_flipped_p; 1200 struct mmu_update *mmu; 1201 struct multicall_entry *mcl; 1202 struct netif_rx_response *rx = &rinfo->rx; 1203 struct netif_extra_info *extras = rinfo->extras; 1204 RING_IDX cons = np->rx.rsp_cons; 1205 struct mbuf *m, *m0, *m_prev; 1206 grant_ref_t ref = xennet_get_rx_ref(np, cons); 1207 int max = 5 /* MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD) */; 1208 int frags = 1; 1209 int err = 0; 1210 u_long ret; 1211 1212 m0 = m = m_prev = xennet_get_rx_mbuf(np, cons); 1213 1214 1215 if (rx->flags & NETRXF_extra_info) { 1216 err = xennet_get_extras(np, extras, rp); 1217 cons = np->rx.rsp_cons; 1218 } 1219 1220 1221 if (m0 != NULL) { 1222 m0->m_pkthdr.len = 0; 1223 m0->m_next = NULL; 1224 } 1225 1226 for (;;) { 1227 u_long mfn; 1228 1229 #if 0 1230 printf("rx->status=%hd rx->offset=%hu frags=%u\n", 1231 rx->status, rx->offset, frags); 1232 #endif 1233 if (unlikely(rx->status < 0 || 1234 rx->offset + rx->status > PAGE_SIZE)) { 1235 #if 0 1236 if (net_ratelimit()) 1237 WPRINTK("rx->offset: %x, size: %u\n", 1238 rx->offset, rx->status); 1239 #endif 1240 xennet_move_rx_slot(np, m, ref); 1241 err = -EINVAL; 1242 goto next; 1243 } 1244 1245 /* 1246 * This definitely indicates a bug, either in this driver or in 1247 * the backend driver. In future this should flag the bad 1248 * situation to the system controller to reboot the backed. 1249 */ 1250 if (ref == GRANT_INVALID_REF) { 1251 #if 0 1252 if (net_ratelimit()) 1253 WPRINTK("Bad rx response id %d.\n", rx->id); 1254 #endif 1255 err = -EINVAL; 1256 goto next; 1257 } 1258 1259 if (!np->copying_receiver) { 1260 /* Memory pressure, insufficient buffer 1261 * headroom, ... 1262 */ 1263 if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) { 1264 if (net_ratelimit()) 1265 WPRINTK("Unfulfilled rx req " 1266 "(id=%d, st=%d).\n", 1267 rx->id, rx->status); 1268 xennet_move_rx_slot(np, m, ref); 1269 err = -ENOMEM; 1270 goto next; 1271 } 1272 1273 if (!xen_feature( XENFEAT_auto_translated_physmap)) { 1274 /* Remap the page. */ 1275 void *vaddr = mtod(m, void *); 1276 uint32_t pfn; 1277 1278 mcl = np->rx_mcl + pages_flipped; 1279 mmu = np->rx_mmu + pages_flipped; 1280 1281 MULTI_update_va_mapping(mcl, (u_long)vaddr, 1282 (((vm_paddr_t)mfn) << PAGE_SHIFT) | PG_RW | 1283 PG_V | PG_M | PG_A, 0); 1284 pfn = (uintptr_t)m->m_ext.ext_arg1; 1285 mmu->ptr = ((vm_paddr_t)mfn << PAGE_SHIFT) | 1286 MMU_MACHPHYS_UPDATE; 1287 mmu->val = pfn; 1288 1289 set_phys_to_machine(pfn, mfn); 1290 } 1291 pages_flipped++; 1292 } else { 1293 ret = gnttab_end_foreign_access_ref(ref); 1294 KASSERT(ret, ("ret != 0")); 1295 } 1296 1297 gnttab_release_grant_reference(&np->gref_rx_head, ref); 1298 1299 next: 1300 if (m != NULL) { 1301 m->m_len = rx->status; 1302 m->m_data += rx->offset; 1303 m0->m_pkthdr.len += rx->status; 1304 } 1305 1306 if (!(rx->flags & NETRXF_more_data)) 1307 break; 1308 1309 if (cons + frags == rp) { 1310 if (net_ratelimit()) 1311 WPRINTK("Need more frags\n"); 1312 err = -ENOENT; 1313 break; 1314 } 1315 m_prev = m; 1316 1317 rx = RING_GET_RESPONSE(&np->rx, cons + frags); 1318 m = xennet_get_rx_mbuf(np, cons + frags); 1319 1320 m_prev->m_next = m; 1321 m->m_next = NULL; 1322 ref = xennet_get_rx_ref(np, cons + frags); 1323 frags++; 1324 } 1325 *list = m0; 1326 1327 if (unlikely(frags > max)) { 1328 if (net_ratelimit()) 1329 WPRINTK("Too many frags\n"); 1330 err = -E2BIG; 1331 } 1332 1333 if (unlikely(err)) 1334 np->rx.rsp_cons = cons + frags; 1335 1336 *pages_flipped_p = pages_flipped; 1337 1338 return err; 1339 } 1340 1341 static void 1342 xn_tick_locked(struct netfront_info *sc) 1343 { 1344 XN_RX_LOCK_ASSERT(sc); 1345 callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc); 1346 1347 /* XXX placeholder for printing debug information */ 1348 1349 } 1350 1351 1352 static void 1353 xn_tick(void *xsc) 1354 { 1355 struct netfront_info *sc; 1356 1357 sc = xsc; 1358 XN_RX_LOCK(sc); 1359 xn_tick_locked(sc); 1360 XN_RX_UNLOCK(sc); 1361 1362 } 1363 static void 1364 xn_start_locked(struct ifnet *ifp) 1365 { 1366 int otherend_id; 1367 unsigned short id; 1368 struct mbuf *m_head, *m; 1369 struct netfront_info *sc; 1370 netif_tx_request_t *tx; 1371 netif_extra_info_t *extra; 1372 RING_IDX i; 1373 grant_ref_t ref; 1374 u_long mfn, tx_bytes; 1375 int notify, nfrags; 1376 1377 sc = ifp->if_softc; 1378 otherend_id = xenbus_get_otherend_id(sc->xbdev); 1379 tx_bytes = 0; 1380 1381 if (!netfront_carrier_ok(sc)) 1382 return; 1383 1384 for (i = sc->tx.req_prod_pvt; TRUE; i++) { 1385 IF_DEQUEUE(&ifp->if_snd, m_head); 1386 if (m_head == NULL) 1387 break; 1388 1389 if (!netfront_tx_slot_available(sc)) { 1390 IF_PREPEND(&ifp->if_snd, m_head); 1391 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 1392 break; 1393 } 1394 1395 1396 /* 1397 * Defragment the mbuf if necessary. 1398 */ 1399 for (m = m_head, nfrags = 0; m; m = m->m_next) 1400 nfrags++; 1401 if (nfrags > MAX_SKB_FRAGS) { 1402 m = m_defrag(m_head, M_DONTWAIT); 1403 if (!m) { 1404 m_freem(m_head); 1405 break; 1406 } 1407 m_head = m; 1408 } 1409 1410 /* 1411 * Start packing the mbufs in this chain into 1412 * the fragment pointers. Stop when we run out 1413 * of fragments or hit the end of the mbuf chain. 1414 */ 1415 m = m_head; 1416 extra = NULL; 1417 for (m = m_head; m; m = m->m_next) { 1418 tx = RING_GET_REQUEST(&sc->tx, i); 1419 id = get_id_from_freelist(sc->xn_cdata.xn_tx_chain); 1420 sc->xn_cdata.xn_tx_chain[id] = m; 1421 tx->id = id; 1422 ref = gnttab_claim_grant_reference(&sc->gref_tx_head); 1423 KASSERT((short)ref >= 0, ("Negative ref")); 1424 mfn = virt_to_mfn(mtod(m, vm_offset_t)); 1425 gnttab_grant_foreign_access_ref(ref, otherend_id, 1426 mfn, GNTMAP_readonly); 1427 tx->gref = sc->grant_tx_ref[id] = ref; 1428 tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1); 1429 tx->flags = 0; 1430 if (m == m_head) { 1431 /* 1432 * The first fragment has the entire packet 1433 * size, subsequent fragments have just the 1434 * fragment size. The backend works out the 1435 * true size of the first fragment by 1436 * subtracting the sizes of the other 1437 * fragments. 1438 */ 1439 tx->size = m->m_pkthdr.len; 1440 1441 /* 1442 * The first fragment contains the 1443 * checksum flags and is optionally 1444 * followed by extra data for TSO etc. 1445 */ 1446 if (m->m_pkthdr.csum_flags 1447 & CSUM_DELAY_DATA) { 1448 tx->flags |= (NETTXF_csum_blank 1449 | NETTXF_data_validated); 1450 } 1451 #if __FreeBSD_version >= 700000 1452 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 1453 struct netif_extra_info *gso = 1454 (struct netif_extra_info *) 1455 RING_GET_REQUEST(&sc->tx, ++i); 1456 1457 if (extra) 1458 extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; 1459 else 1460 tx->flags |= NETTXF_extra_info; 1461 1462 gso->u.gso.size = m->m_pkthdr.tso_segsz; 1463 gso->u.gso.type = 1464 XEN_NETIF_GSO_TYPE_TCPV4; 1465 gso->u.gso.pad = 0; 1466 gso->u.gso.features = 0; 1467 1468 gso->type = XEN_NETIF_EXTRA_TYPE_GSO; 1469 gso->flags = 0; 1470 extra = gso; 1471 } 1472 #endif 1473 } else { 1474 tx->size = m->m_len; 1475 } 1476 if (m->m_next) { 1477 tx->flags |= NETTXF_more_data; 1478 i++; 1479 } 1480 } 1481 1482 BPF_MTAP(ifp, m_head); 1483 1484 sc->stats.tx_bytes += m_head->m_pkthdr.len; 1485 sc->stats.tx_packets++; 1486 } 1487 1488 sc->tx.req_prod_pvt = i; 1489 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->tx, notify); 1490 if (notify) 1491 notify_remote_via_irq(sc->irq); 1492 1493 xn_txeof(sc); 1494 1495 if (RING_FULL(&sc->tx)) { 1496 sc->tx_full = 1; 1497 #if 0 1498 netif_stop_queue(dev); 1499 #endif 1500 } 1501 1502 return; 1503 } 1504 1505 static void 1506 xn_start(struct ifnet *ifp) 1507 { 1508 struct netfront_info *sc; 1509 sc = ifp->if_softc; 1510 XN_TX_LOCK(sc); 1511 xn_start_locked(ifp); 1512 XN_TX_UNLOCK(sc); 1513 } 1514 1515 /* equivalent of network_open() in Linux */ 1516 static void 1517 xn_ifinit_locked(struct netfront_info *sc) 1518 { 1519 struct ifnet *ifp; 1520 1521 XN_LOCK_ASSERT(sc); 1522 1523 ifp = sc->xn_ifp; 1524 1525 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1526 return; 1527 1528 xn_stop(sc); 1529 1530 network_alloc_rx_buffers(sc); 1531 sc->rx.sring->rsp_event = sc->rx.rsp_cons + 1; 1532 1533 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1534 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1535 1536 callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc); 1537 1538 } 1539 1540 1541 static void 1542 xn_ifinit(void *xsc) 1543 { 1544 struct netfront_info *sc = xsc; 1545 1546 XN_LOCK(sc); 1547 xn_ifinit_locked(sc); 1548 XN_UNLOCK(sc); 1549 1550 } 1551 1552 1553 static int 1554 xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1555 { 1556 struct netfront_info *sc = ifp->if_softc; 1557 struct ifreq *ifr = (struct ifreq *) data; 1558 struct ifaddr *ifa = (struct ifaddr *)data; 1559 1560 int mask, error = 0; 1561 switch(cmd) { 1562 case SIOCSIFADDR: 1563 case SIOCGIFADDR: 1564 XN_LOCK(sc); 1565 if (ifa->ifa_addr->sa_family == AF_INET) { 1566 ifp->if_flags |= IFF_UP; 1567 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 1568 xn_ifinit_locked(sc); 1569 arp_ifinit(ifp, ifa); 1570 XN_UNLOCK(sc); 1571 } else { 1572 XN_UNLOCK(sc); 1573 error = ether_ioctl(ifp, cmd, data); 1574 } 1575 break; 1576 case SIOCSIFMTU: 1577 /* XXX can we alter the MTU on a VN ?*/ 1578 #ifdef notyet 1579 if (ifr->ifr_mtu > XN_JUMBO_MTU) 1580 error = EINVAL; 1581 else 1582 #endif 1583 { 1584 ifp->if_mtu = ifr->ifr_mtu; 1585 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1586 xn_ifinit(sc); 1587 } 1588 break; 1589 case SIOCSIFFLAGS: 1590 XN_LOCK(sc); 1591 if (ifp->if_flags & IFF_UP) { 1592 /* 1593 * If only the state of the PROMISC flag changed, 1594 * then just use the 'set promisc mode' command 1595 * instead of reinitializing the entire NIC. Doing 1596 * a full re-init means reloading the firmware and 1597 * waiting for it to start up, which may take a 1598 * second or two. 1599 */ 1600 #ifdef notyet 1601 /* No promiscuous mode with Xen */ 1602 if (ifp->if_drv_flags & IFF_DRV_RUNNING && 1603 ifp->if_flags & IFF_PROMISC && 1604 !(sc->xn_if_flags & IFF_PROMISC)) { 1605 XN_SETBIT(sc, XN_RX_MODE, 1606 XN_RXMODE_RX_PROMISC); 1607 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && 1608 !(ifp->if_flags & IFF_PROMISC) && 1609 sc->xn_if_flags & IFF_PROMISC) { 1610 XN_CLRBIT(sc, XN_RX_MODE, 1611 XN_RXMODE_RX_PROMISC); 1612 } else 1613 #endif 1614 xn_ifinit_locked(sc); 1615 } else { 1616 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1617 xn_stop(sc); 1618 } 1619 } 1620 sc->xn_if_flags = ifp->if_flags; 1621 XN_UNLOCK(sc); 1622 error = 0; 1623 break; 1624 case SIOCSIFCAP: 1625 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 1626 if (mask & IFCAP_TXCSUM) { 1627 if (IFCAP_TXCSUM & ifp->if_capenable) { 1628 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 1629 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 1630 | CSUM_IP | CSUM_TSO); 1631 } else { 1632 ifp->if_capenable |= IFCAP_TXCSUM; 1633 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP 1634 | CSUM_IP); 1635 } 1636 } 1637 if (mask & IFCAP_RXCSUM) { 1638 ifp->if_capenable ^= IFCAP_RXCSUM; 1639 } 1640 #if __FreeBSD_version >= 700000 1641 if (mask & IFCAP_TSO4) { 1642 if (IFCAP_TSO4 & ifp->if_capenable) { 1643 ifp->if_capenable &= ~IFCAP_TSO4; 1644 ifp->if_hwassist &= ~CSUM_TSO; 1645 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 1646 ifp->if_capenable |= IFCAP_TSO4; 1647 ifp->if_hwassist |= CSUM_TSO; 1648 } else { 1649 DPRINTK("Xen requires tx checksum offload" 1650 " be enabled to use TSO\n"); 1651 error = EINVAL; 1652 } 1653 } 1654 if (mask & IFCAP_LRO) { 1655 ifp->if_capenable ^= IFCAP_LRO; 1656 1657 } 1658 #endif 1659 error = 0; 1660 break; 1661 case SIOCADDMULTI: 1662 case SIOCDELMULTI: 1663 #ifdef notyet 1664 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1665 XN_LOCK(sc); 1666 xn_setmulti(sc); 1667 XN_UNLOCK(sc); 1668 error = 0; 1669 } 1670 #endif 1671 /* FALLTHROUGH */ 1672 case SIOCSIFMEDIA: 1673 case SIOCGIFMEDIA: 1674 error = EINVAL; 1675 break; 1676 default: 1677 error = ether_ioctl(ifp, cmd, data); 1678 } 1679 1680 return (error); 1681 } 1682 1683 static void 1684 xn_stop(struct netfront_info *sc) 1685 { 1686 struct ifnet *ifp; 1687 1688 XN_LOCK_ASSERT(sc); 1689 1690 ifp = sc->xn_ifp; 1691 1692 callout_stop(&sc->xn_stat_ch); 1693 1694 xn_free_rx_ring(sc); 1695 xn_free_tx_ring(sc); 1696 1697 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 1698 } 1699 1700 /* START of Xenolinux helper functions adapted to FreeBSD */ 1701 int 1702 network_connect(struct netfront_info *np) 1703 { 1704 int i, requeue_idx, error; 1705 grant_ref_t ref; 1706 netif_rx_request_t *req; 1707 u_int feature_rx_copy, feature_rx_flip; 1708 1709 error = xenbus_scanf(XBT_NIL, xenbus_get_otherend_path(np->xbdev), 1710 "feature-rx-copy", NULL, "%u", &feature_rx_copy); 1711 if (error) 1712 feature_rx_copy = 0; 1713 error = xenbus_scanf(XBT_NIL, xenbus_get_otherend_path(np->xbdev), 1714 "feature-rx-flip", NULL, "%u", &feature_rx_flip); 1715 if (error) 1716 feature_rx_flip = 1; 1717 1718 /* 1719 * Copy packets on receive path if: 1720 * (a) This was requested by user, and the backend supports it; or 1721 * (b) Flipping was requested, but this is unsupported by the backend. 1722 */ 1723 np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) || 1724 (MODPARM_rx_flip && !feature_rx_flip)); 1725 1726 XN_LOCK(np); 1727 /* Recovery procedure: */ 1728 error = talk_to_backend(np->xbdev, np); 1729 if (error) 1730 return (error); 1731 1732 /* Step 1: Reinitialise variables. */ 1733 netif_release_tx_bufs(np); 1734 1735 /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ 1736 for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { 1737 struct mbuf *m; 1738 u_long pfn; 1739 1740 if (np->rx_mbufs[i] == NULL) 1741 continue; 1742 1743 m = np->rx_mbufs[requeue_idx] = xennet_get_rx_mbuf(np, i); 1744 ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i); 1745 req = RING_GET_REQUEST(&np->rx, requeue_idx); 1746 pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT; 1747 1748 if (!np->copying_receiver) { 1749 gnttab_grant_foreign_transfer_ref(ref, 1750 xenbus_get_otherend_id(np->xbdev), 1751 pfn); 1752 } else { 1753 gnttab_grant_foreign_access_ref(ref, 1754 xenbus_get_otherend_id(np->xbdev), 1755 PFNTOMFN(pfn), 0); 1756 } 1757 req->gref = ref; 1758 req->id = requeue_idx; 1759 1760 requeue_idx++; 1761 } 1762 1763 np->rx.req_prod_pvt = requeue_idx; 1764 1765 /* Step 3: All public and private state should now be sane. Get 1766 * ready to start sending and receiving packets and give the driver 1767 * domain a kick because we've probably just requeued some 1768 * packets. 1769 */ 1770 netfront_carrier_on(np); 1771 notify_remote_via_irq(np->irq); 1772 XN_TX_LOCK(np); 1773 xn_txeof(np); 1774 XN_TX_UNLOCK(np); 1775 network_alloc_rx_buffers(np); 1776 XN_UNLOCK(np); 1777 1778 return (0); 1779 } 1780 1781 static void 1782 show_device(struct netfront_info *sc) 1783 { 1784 #ifdef DEBUG 1785 if (sc) { 1786 IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n", 1787 sc->xn_ifno, 1788 be_state_name[sc->xn_backend_state], 1789 sc->xn_user_state ? "open" : "closed", 1790 sc->xn_evtchn, 1791 sc->xn_irq, 1792 sc->xn_tx_if, 1793 sc->xn_rx_if); 1794 } else { 1795 IPRINTK("<vif NULL>\n"); 1796 } 1797 #endif 1798 } 1799 1800 /** Create a network device. 1801 * @param handle device handle 1802 */ 1803 int 1804 create_netdev(device_t dev) 1805 { 1806 int i; 1807 struct netfront_info *np; 1808 int err; 1809 struct ifnet *ifp; 1810 1811 np = device_get_softc(dev); 1812 1813 np->xbdev = dev; 1814 1815 XN_LOCK_INIT(np, xennetif); 1816 np->rx_target = RX_MIN_TARGET; 1817 np->rx_min_target = RX_MIN_TARGET; 1818 np->rx_max_target = RX_MAX_TARGET; 1819 1820 /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ 1821 for (i = 0; i <= NET_TX_RING_SIZE; i++) { 1822 np->tx_mbufs[i] = (void *) ((u_long) i+1); 1823 np->grant_tx_ref[i] = GRANT_INVALID_REF; 1824 } 1825 for (i = 0; i <= NET_RX_RING_SIZE; i++) { 1826 np->rx_mbufs[i] = NULL; 1827 np->grant_rx_ref[i] = GRANT_INVALID_REF; 1828 } 1829 /* A grant for every tx ring slot */ 1830 if (gnttab_alloc_grant_references(TX_MAX_TARGET, 1831 &np->gref_tx_head) < 0) { 1832 printf("#### netfront can't alloc tx grant refs\n"); 1833 err = ENOMEM; 1834 goto exit; 1835 } 1836 /* A grant for every rx ring slot */ 1837 if (gnttab_alloc_grant_references(RX_MAX_TARGET, 1838 &np->gref_rx_head) < 0) { 1839 printf("#### netfront can't alloc rx grant refs\n"); 1840 gnttab_free_grant_references(np->gref_tx_head); 1841 err = ENOMEM; 1842 goto exit; 1843 } 1844 1845 err = xen_net_read_mac(dev, np->mac); 1846 if (err) { 1847 xenbus_dev_fatal(dev, err, "parsing %s/mac", 1848 xenbus_get_node(dev)); 1849 goto out; 1850 } 1851 1852 /* Set up ifnet structure */ 1853 ifp = np->xn_ifp = if_alloc(IFT_ETHER); 1854 ifp->if_softc = np; 1855 if_initname(ifp, "xn", device_get_unit(dev)); 1856 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 1857 ifp->if_ioctl = xn_ioctl; 1858 ifp->if_output = ether_output; 1859 ifp->if_start = xn_start; 1860 #ifdef notyet 1861 ifp->if_watchdog = xn_watchdog; 1862 #endif 1863 ifp->if_init = xn_ifinit; 1864 ifp->if_mtu = ETHERMTU; 1865 ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1; 1866 1867 ifp->if_hwassist = XN_CSUM_FEATURES; 1868 ifp->if_capabilities = IFCAP_HWCSUM; 1869 #if __FreeBSD_version >= 700000 1870 ifp->if_capabilities |= IFCAP_TSO4; 1871 if (xn_enable_lro) { 1872 int err = tcp_lro_init(&np->xn_lro); 1873 if (err) { 1874 device_printf(dev, "LRO initialization failed\n"); 1875 goto exit; 1876 } 1877 np->xn_lro.ifp = ifp; 1878 ifp->if_capabilities |= IFCAP_LRO; 1879 } 1880 #endif 1881 ifp->if_capenable = ifp->if_capabilities; 1882 1883 ether_ifattach(ifp, np->mac); 1884 callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE); 1885 netfront_carrier_off(np); 1886 1887 return (0); 1888 1889 exit: 1890 gnttab_free_grant_references(np->gref_tx_head); 1891 out: 1892 panic("do something smart"); 1893 1894 } 1895 1896 /** 1897 * Handle the change of state of the backend to Closing. We must delete our 1898 * device-layer structures now, to ensure that writes are flushed through to 1899 * the backend. Once is this done, we can switch to Closed in 1900 * acknowledgement. 1901 */ 1902 #if 0 1903 static void netfront_closing(device_t dev) 1904 { 1905 #if 0 1906 struct netfront_info *info = dev->dev_driver_data; 1907 1908 DPRINTK("netfront_closing: %s removed\n", dev->nodename); 1909 1910 close_netdev(info); 1911 #endif 1912 xenbus_switch_state(dev, XenbusStateClosed); 1913 } 1914 #endif 1915 1916 static int netfront_detach(device_t dev) 1917 { 1918 struct netfront_info *info = device_get_softc(dev); 1919 1920 DPRINTK("%s\n", xenbus_get_node(dev)); 1921 1922 netif_free(info); 1923 1924 return 0; 1925 } 1926 1927 1928 static void netif_free(struct netfront_info *info) 1929 { 1930 netif_disconnect_backend(info); 1931 #if 0 1932 close_netdev(info); 1933 #endif 1934 } 1935 1936 static void netif_disconnect_backend(struct netfront_info *info) 1937 { 1938 XN_RX_LOCK(info); 1939 XN_TX_LOCK(info); 1940 netfront_carrier_off(info); 1941 XN_TX_UNLOCK(info); 1942 XN_RX_UNLOCK(info); 1943 1944 end_access(info->tx_ring_ref, info->tx.sring); 1945 end_access(info->rx_ring_ref, info->rx.sring); 1946 info->tx_ring_ref = GRANT_INVALID_REF; 1947 info->rx_ring_ref = GRANT_INVALID_REF; 1948 info->tx.sring = NULL; 1949 info->rx.sring = NULL; 1950 1951 if (info->irq) 1952 unbind_from_irqhandler(info->irq); 1953 1954 info->irq = 0; 1955 } 1956 1957 1958 static void end_access(int ref, void *page) 1959 { 1960 if (ref != GRANT_INVALID_REF) 1961 gnttab_end_foreign_access(ref, page); 1962 } 1963 1964 /* ** Driver registration ** */ 1965 static device_method_t netfront_methods[] = { 1966 /* Device interface */ 1967 DEVMETHOD(device_probe, netfront_probe), 1968 DEVMETHOD(device_attach, netfront_attach), 1969 DEVMETHOD(device_detach, netfront_detach), 1970 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1971 DEVMETHOD(device_suspend, bus_generic_suspend), 1972 DEVMETHOD(device_resume, netfront_resume), 1973 1974 /* Xenbus interface */ 1975 DEVMETHOD(xenbus_backend_changed, netfront_backend_changed), 1976 1977 { 0, 0 } 1978 }; 1979 1980 static driver_t netfront_driver = { 1981 "xn", 1982 netfront_methods, 1983 sizeof(struct netfront_info), 1984 }; 1985 devclass_t netfront_devclass; 1986 1987 DRIVER_MODULE(xe, xenbus, netfront_driver, netfront_devclass, 0, 0); 1988