1 /*- 2 * Copyright (c) 2004-2006 Kip Macy 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include "opt_inet.h" 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/sockio.h> 35 #include <sys/mbuf.h> 36 #include <sys/malloc.h> 37 #include <sys/module.h> 38 #include <sys/kernel.h> 39 #include <sys/socket.h> 40 #include <sys/sysctl.h> 41 #include <sys/queue.h> 42 #include <sys/lock.h> 43 #include <sys/sx.h> 44 45 #include <net/if.h> 46 #include <net/if_arp.h> 47 #include <net/ethernet.h> 48 #include <net/if_dl.h> 49 #include <net/if_media.h> 50 51 #include <net/bpf.h> 52 53 #include <net/if_types.h> 54 #include <net/if.h> 55 56 #include <netinet/in_systm.h> 57 #include <netinet/in.h> 58 #include <netinet/ip.h> 59 #include <netinet/if_ether.h> 60 #if __FreeBSD_version >= 700000 61 #include <netinet/tcp.h> 62 #include <netinet/tcp_lro.h> 63 #endif 64 65 #include <vm/vm.h> 66 #include <vm/pmap.h> 67 68 #include <machine/clock.h> /* for DELAY */ 69 #include <machine/bus.h> 70 #include <machine/resource.h> 71 #include <machine/frame.h> 72 #include <machine/vmparam.h> 73 74 #include <sys/bus.h> 75 #include <sys/rman.h> 76 77 #include <machine/intr_machdep.h> 78 79 #include <machine/xen/xen-os.h> 80 #include <machine/xen/xenfunc.h> 81 #include <machine/xen/xenvar.h> 82 #include <xen/hypervisor.h> 83 #include <xen/xen_intr.h> 84 #include <xen/evtchn.h> 85 #include <xen/gnttab.h> 86 #include <xen/interface/memory.h> 87 #include <xen/interface/io/netif.h> 88 #include <xen/xenbus/xenbusvar.h> 89 90 #include <dev/xen/netfront/mbufq.h> 91 92 #include "xenbus_if.h" 93 94 /* Features supported by all backends. TSO and LRO can be negotiated */ 95 #define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) 96 97 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) 98 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) 99 100 #if __FreeBSD_version >= 700000 101 /* 102 * Should the driver do LRO on the RX end 103 * this can be toggled on the fly, but the 104 * interface must be reset (down/up) for it 105 * to take effect. 106 */ 107 static int xn_enable_lro = 1; 108 TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro); 109 #else 110 111 #define IFCAP_TSO4 0 112 #define CSUM_TSO 0 113 114 #endif 115 116 #ifdef CONFIG_XEN 117 static int MODPARM_rx_copy = 0; 118 module_param_named(rx_copy, MODPARM_rx_copy, bool, 0); 119 MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)"); 120 static int MODPARM_rx_flip = 0; 121 module_param_named(rx_flip, MODPARM_rx_flip, bool, 0); 122 MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)"); 123 #else 124 static const int MODPARM_rx_copy = 1; 125 static const int MODPARM_rx_flip = 0; 126 #endif 127 128 /** 129 * \brief The maximum allowed data fragments in a single transmit 130 * request. 131 * 132 * This limit is imposed by the backend driver. We assume here that 133 * we are dealing with a Linux driver domain and have set our limit 134 * to mirror the Linux MAX_SKB_FRAGS constant. 135 */ 136 #define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2) 137 #define NF_TSO_MAXBURST ((IP_MAXPACKET / PAGE_SIZE) * MCLBYTES) 138 139 #define RX_COPY_THRESHOLD 256 140 141 #define net_ratelimit() 0 142 143 struct netfront_info; 144 struct netfront_rx_info; 145 146 static void xn_txeof(struct netfront_info *); 147 static void xn_rxeof(struct netfront_info *); 148 static void network_alloc_rx_buffers(struct netfront_info *); 149 150 static void xn_tick_locked(struct netfront_info *); 151 static void xn_tick(void *); 152 153 static void xn_intr(void *); 154 static inline int xn_count_frags(struct mbuf *m); 155 static int xn_assemble_tx_request(struct netfront_info *sc, 156 struct mbuf *m_head); 157 static void xn_start_locked(struct ifnet *); 158 static void xn_start(struct ifnet *); 159 static int xn_ioctl(struct ifnet *, u_long, caddr_t); 160 static void xn_ifinit_locked(struct netfront_info *); 161 static void xn_ifinit(void *); 162 static void xn_stop(struct netfront_info *); 163 static void xn_query_features(struct netfront_info *np); 164 static int xn_configure_features(struct netfront_info *np); 165 #ifdef notyet 166 static void xn_watchdog(struct ifnet *); 167 #endif 168 169 static void show_device(struct netfront_info *sc); 170 #ifdef notyet 171 static void netfront_closing(device_t dev); 172 #endif 173 static void netif_free(struct netfront_info *info); 174 static int netfront_detach(device_t dev); 175 176 static int talk_to_backend(device_t dev, struct netfront_info *info); 177 static int create_netdev(device_t dev); 178 static void netif_disconnect_backend(struct netfront_info *info); 179 static int setup_device(device_t dev, struct netfront_info *info); 180 static void free_ring(int *ref, void *ring_ptr_ref); 181 182 static int xn_ifmedia_upd(struct ifnet *ifp); 183 static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); 184 185 /* Xenolinux helper functions */ 186 int network_connect(struct netfront_info *); 187 188 static void xn_free_rx_ring(struct netfront_info *); 189 190 static void xn_free_tx_ring(struct netfront_info *); 191 192 static int xennet_get_responses(struct netfront_info *np, 193 struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, 194 struct mbuf **list, int *pages_flipped_p); 195 196 #define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT) 197 198 #define INVALID_P2M_ENTRY (~0UL) 199 200 /* 201 * Mbuf pointers. We need these to keep track of the virtual addresses 202 * of our mbuf chains since we can only convert from virtual to physical, 203 * not the other way around. The size must track the free index arrays. 204 */ 205 struct xn_chain_data { 206 struct mbuf *xn_tx_chain[NET_TX_RING_SIZE+1]; 207 int xn_tx_chain_cnt; 208 struct mbuf *xn_rx_chain[NET_RX_RING_SIZE+1]; 209 }; 210 211 struct net_device_stats 212 { 213 u_long rx_packets; /* total packets received */ 214 u_long tx_packets; /* total packets transmitted */ 215 u_long rx_bytes; /* total bytes received */ 216 u_long tx_bytes; /* total bytes transmitted */ 217 u_long rx_errors; /* bad packets received */ 218 u_long tx_errors; /* packet transmit problems */ 219 u_long rx_dropped; /* no space in linux buffers */ 220 u_long tx_dropped; /* no space available in linux */ 221 u_long multicast; /* multicast packets received */ 222 u_long collisions; 223 224 /* detailed rx_errors: */ 225 u_long rx_length_errors; 226 u_long rx_over_errors; /* receiver ring buff overflow */ 227 u_long rx_crc_errors; /* recved pkt with crc error */ 228 u_long rx_frame_errors; /* recv'd frame alignment error */ 229 u_long rx_fifo_errors; /* recv'r fifo overrun */ 230 u_long rx_missed_errors; /* receiver missed packet */ 231 232 /* detailed tx_errors */ 233 u_long tx_aborted_errors; 234 u_long tx_carrier_errors; 235 u_long tx_fifo_errors; 236 u_long tx_heartbeat_errors; 237 u_long tx_window_errors; 238 239 /* for cslip etc */ 240 u_long rx_compressed; 241 u_long tx_compressed; 242 }; 243 244 struct netfront_info { 245 struct ifnet *xn_ifp; 246 #if __FreeBSD_version >= 700000 247 struct lro_ctrl xn_lro; 248 #endif 249 250 struct net_device_stats stats; 251 u_int tx_full; 252 253 netif_tx_front_ring_t tx; 254 netif_rx_front_ring_t rx; 255 256 struct mtx tx_lock; 257 struct mtx rx_lock; 258 struct mtx sc_lock; 259 260 u_int handle; 261 u_int irq; 262 u_int copying_receiver; 263 u_int carrier; 264 u_int maxfrags; 265 266 /* Receive-ring batched refills. */ 267 #define RX_MIN_TARGET 32 268 #define RX_MAX_TARGET NET_RX_RING_SIZE 269 int rx_min_target; 270 int rx_max_target; 271 int rx_target; 272 273 grant_ref_t gref_tx_head; 274 grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; 275 grant_ref_t gref_rx_head; 276 grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1]; 277 278 device_t xbdev; 279 int tx_ring_ref; 280 int rx_ring_ref; 281 uint8_t mac[ETHER_ADDR_LEN]; 282 struct xn_chain_data xn_cdata; /* mbufs */ 283 struct mbuf_head xn_rx_batch; /* head of the batch queue */ 284 285 int xn_if_flags; 286 struct callout xn_stat_ch; 287 288 u_long rx_pfn_array[NET_RX_RING_SIZE]; 289 multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1]; 290 mmu_update_t rx_mmu[NET_RX_RING_SIZE]; 291 struct ifmedia sc_media; 292 }; 293 294 #define rx_mbufs xn_cdata.xn_rx_chain 295 #define tx_mbufs xn_cdata.xn_tx_chain 296 297 #define XN_LOCK_INIT(_sc, _name) \ 298 mtx_init(&(_sc)->tx_lock, #_name"_tx", "network transmit lock", MTX_DEF); \ 299 mtx_init(&(_sc)->rx_lock, #_name"_rx", "network receive lock", MTX_DEF); \ 300 mtx_init(&(_sc)->sc_lock, #_name"_sc", "netfront softc lock", MTX_DEF) 301 302 #define XN_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_lock) 303 #define XN_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_lock) 304 305 #define XN_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_lock) 306 #define XN_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_lock) 307 308 #define XN_LOCK(_sc) mtx_lock(&(_sc)->sc_lock); 309 #define XN_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_lock); 310 311 #define XN_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_lock, MA_OWNED); 312 #define XN_RX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->rx_lock, MA_OWNED); 313 #define XN_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_lock, MA_OWNED); 314 #define XN_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_lock); \ 315 mtx_destroy(&(_sc)->tx_lock); \ 316 mtx_destroy(&(_sc)->sc_lock); 317 318 struct netfront_rx_info { 319 struct netif_rx_response rx; 320 struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; 321 }; 322 323 #define netfront_carrier_on(netif) ((netif)->carrier = 1) 324 #define netfront_carrier_off(netif) ((netif)->carrier = 0) 325 #define netfront_carrier_ok(netif) ((netif)->carrier) 326 327 /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */ 328 329 static inline void 330 add_id_to_freelist(struct mbuf **list, uintptr_t id) 331 { 332 KASSERT(id != 0, 333 ("%s: the head item (0) must always be free.", __func__)); 334 list[id] = list[0]; 335 list[0] = (struct mbuf *)id; 336 } 337 338 static inline unsigned short 339 get_id_from_freelist(struct mbuf **list) 340 { 341 uintptr_t id; 342 343 id = (uintptr_t)list[0]; 344 KASSERT(id != 0, 345 ("%s: the head item (0) must always remain free.", __func__)); 346 list[0] = list[id]; 347 return (id); 348 } 349 350 static inline int 351 xennet_rxidx(RING_IDX idx) 352 { 353 return idx & (NET_RX_RING_SIZE - 1); 354 } 355 356 static inline struct mbuf * 357 xennet_get_rx_mbuf(struct netfront_info *np, RING_IDX ri) 358 { 359 int i = xennet_rxidx(ri); 360 struct mbuf *m; 361 362 m = np->rx_mbufs[i]; 363 np->rx_mbufs[i] = NULL; 364 return (m); 365 } 366 367 static inline grant_ref_t 368 xennet_get_rx_ref(struct netfront_info *np, RING_IDX ri) 369 { 370 int i = xennet_rxidx(ri); 371 grant_ref_t ref = np->grant_rx_ref[i]; 372 KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n")); 373 np->grant_rx_ref[i] = GRANT_REF_INVALID; 374 return ref; 375 } 376 377 #define IPRINTK(fmt, args...) \ 378 printf("[XEN] " fmt, ##args) 379 #ifdef INVARIANTS 380 #define WPRINTK(fmt, args...) \ 381 printf("[XEN] " fmt, ##args) 382 #else 383 #define WPRINTK(fmt, args...) 384 #endif 385 #ifdef DEBUG 386 #define DPRINTK(fmt, args...) \ 387 printf("[XEN] %s: " fmt, __func__, ##args) 388 #else 389 #define DPRINTK(fmt, args...) 390 #endif 391 392 /** 393 * Read the 'mac' node at the given device's node in the store, and parse that 394 * as colon-separated octets, placing result the given mac array. mac must be 395 * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h). 396 * Return 0 on success, or errno on error. 397 */ 398 static int 399 xen_net_read_mac(device_t dev, uint8_t mac[]) 400 { 401 int error, i; 402 char *s, *e, *macstr; 403 const char *path; 404 405 path = xenbus_get_node(dev); 406 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 407 if (error == ENOENT) { 408 /* 409 * Deal with missing mac XenStore nodes on devices with 410 * HVM emulation (the 'ioemu' configuration attribute) 411 * enabled. 412 * 413 * The HVM emulator may execute in a stub device model 414 * domain which lacks the permission, only given to Dom0, 415 * to update the guest's XenStore tree. For this reason, 416 * the HVM emulator doesn't even attempt to write the 417 * front-side mac node, even when operating in Dom0. 418 * However, there should always be a mac listed in the 419 * backend tree. Fallback to this version if our query 420 * of the front side XenStore location doesn't find 421 * anything. 422 */ 423 path = xenbus_get_otherend_path(dev); 424 error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); 425 } 426 if (error != 0) { 427 xenbus_dev_fatal(dev, error, "parsing %s/mac", path); 428 return (error); 429 } 430 431 s = macstr; 432 for (i = 0; i < ETHER_ADDR_LEN; i++) { 433 mac[i] = strtoul(s, &e, 16); 434 if (s == e || (e[0] != ':' && e[0] != 0)) { 435 free(macstr, M_XENBUS); 436 return (ENOENT); 437 } 438 s = &e[1]; 439 } 440 free(macstr, M_XENBUS); 441 return (0); 442 } 443 444 /** 445 * Entry point to this code when a new device is created. Allocate the basic 446 * structures and the ring buffers for communication with the backend, and 447 * inform the backend of the appropriate details for those. Switch to 448 * Connected state. 449 */ 450 static int 451 netfront_probe(device_t dev) 452 { 453 454 if (!strcmp(xenbus_get_type(dev), "vif")) { 455 device_set_desc(dev, "Virtual Network Interface"); 456 return (0); 457 } 458 459 return (ENXIO); 460 } 461 462 static int 463 netfront_attach(device_t dev) 464 { 465 int err; 466 467 err = create_netdev(dev); 468 if (err) { 469 xenbus_dev_fatal(dev, err, "creating netdev"); 470 return (err); 471 } 472 473 #if __FreeBSD_version >= 700000 474 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), 475 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 476 OID_AUTO, "enable_lro", CTLTYPE_INT|CTLFLAG_RW, 477 &xn_enable_lro, 0, "Large Receive Offload"); 478 #endif 479 480 return (0); 481 } 482 483 static int 484 netfront_suspend(device_t dev) 485 { 486 struct netfront_info *info = device_get_softc(dev); 487 488 XN_RX_LOCK(info); 489 XN_TX_LOCK(info); 490 netfront_carrier_off(info); 491 XN_TX_UNLOCK(info); 492 XN_RX_UNLOCK(info); 493 return (0); 494 } 495 496 /** 497 * We are reconnecting to the backend, due to a suspend/resume, or a backend 498 * driver restart. We tear down our netif structure and recreate it, but 499 * leave the device-layer structures intact so that this is transparent to the 500 * rest of the kernel. 501 */ 502 static int 503 netfront_resume(device_t dev) 504 { 505 struct netfront_info *info = device_get_softc(dev); 506 507 netif_disconnect_backend(info); 508 return (0); 509 } 510 511 /* Common code used when first setting up, and when resuming. */ 512 static int 513 talk_to_backend(device_t dev, struct netfront_info *info) 514 { 515 const char *message; 516 struct xs_transaction xst; 517 const char *node = xenbus_get_node(dev); 518 int err; 519 520 err = xen_net_read_mac(dev, info->mac); 521 if (err) { 522 xenbus_dev_fatal(dev, err, "parsing %s/mac", node); 523 goto out; 524 } 525 526 /* Create shared ring, alloc event channel. */ 527 err = setup_device(dev, info); 528 if (err) 529 goto out; 530 531 again: 532 err = xs_transaction_start(&xst); 533 if (err) { 534 xenbus_dev_fatal(dev, err, "starting transaction"); 535 goto destroy_ring; 536 } 537 err = xs_printf(xst, node, "tx-ring-ref","%u", 538 info->tx_ring_ref); 539 if (err) { 540 message = "writing tx ring-ref"; 541 goto abort_transaction; 542 } 543 err = xs_printf(xst, node, "rx-ring-ref","%u", 544 info->rx_ring_ref); 545 if (err) { 546 message = "writing rx ring-ref"; 547 goto abort_transaction; 548 } 549 err = xs_printf(xst, node, 550 "event-channel", "%u", irq_to_evtchn_port(info->irq)); 551 if (err) { 552 message = "writing event-channel"; 553 goto abort_transaction; 554 } 555 err = xs_printf(xst, node, "request-rx-copy", "%u", 556 info->copying_receiver); 557 if (err) { 558 message = "writing request-rx-copy"; 559 goto abort_transaction; 560 } 561 err = xs_printf(xst, node, "feature-rx-notify", "%d", 1); 562 if (err) { 563 message = "writing feature-rx-notify"; 564 goto abort_transaction; 565 } 566 err = xs_printf(xst, node, "feature-sg", "%d", 1); 567 if (err) { 568 message = "writing feature-sg"; 569 goto abort_transaction; 570 } 571 #if __FreeBSD_version >= 700000 572 err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1); 573 if (err) { 574 message = "writing feature-gso-tcpv4"; 575 goto abort_transaction; 576 } 577 #endif 578 579 err = xs_transaction_end(xst, 0); 580 if (err) { 581 if (err == EAGAIN) 582 goto again; 583 xenbus_dev_fatal(dev, err, "completing transaction"); 584 goto destroy_ring; 585 } 586 587 return 0; 588 589 abort_transaction: 590 xs_transaction_end(xst, 1); 591 xenbus_dev_fatal(dev, err, "%s", message); 592 destroy_ring: 593 netif_free(info); 594 out: 595 return err; 596 } 597 598 static int 599 setup_device(device_t dev, struct netfront_info *info) 600 { 601 netif_tx_sring_t *txs; 602 netif_rx_sring_t *rxs; 603 int error; 604 struct ifnet *ifp; 605 606 ifp = info->xn_ifp; 607 608 info->tx_ring_ref = GRANT_REF_INVALID; 609 info->rx_ring_ref = GRANT_REF_INVALID; 610 info->rx.sring = NULL; 611 info->tx.sring = NULL; 612 info->irq = 0; 613 614 txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); 615 if (!txs) { 616 error = ENOMEM; 617 xenbus_dev_fatal(dev, error, "allocating tx ring page"); 618 goto fail; 619 } 620 SHARED_RING_INIT(txs); 621 FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); 622 error = xenbus_grant_ring(dev, virt_to_mfn(txs), &info->tx_ring_ref); 623 if (error) 624 goto fail; 625 626 rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); 627 if (!rxs) { 628 error = ENOMEM; 629 xenbus_dev_fatal(dev, error, "allocating rx ring page"); 630 goto fail; 631 } 632 SHARED_RING_INIT(rxs); 633 FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); 634 635 error = xenbus_grant_ring(dev, virt_to_mfn(rxs), &info->rx_ring_ref); 636 if (error) 637 goto fail; 638 639 error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev), 640 "xn", xn_intr, info, INTR_TYPE_NET | INTR_MPSAFE, &info->irq); 641 642 if (error) { 643 xenbus_dev_fatal(dev, error, 644 "bind_evtchn_to_irqhandler failed"); 645 goto fail; 646 } 647 648 show_device(info); 649 650 return (0); 651 652 fail: 653 netif_free(info); 654 return (error); 655 } 656 657 #ifdef INET 658 /** 659 * If this interface has an ipv4 address, send an arp for it. This 660 * helps to get the network going again after migrating hosts. 661 */ 662 static void 663 netfront_send_fake_arp(device_t dev, struct netfront_info *info) 664 { 665 struct ifnet *ifp; 666 struct ifaddr *ifa; 667 668 ifp = info->xn_ifp; 669 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 670 if (ifa->ifa_addr->sa_family == AF_INET) { 671 arp_ifinit(ifp, ifa); 672 } 673 } 674 } 675 #endif 676 677 /** 678 * Callback received when the backend's state changes. 679 */ 680 static void 681 netfront_backend_changed(device_t dev, XenbusState newstate) 682 { 683 struct netfront_info *sc = device_get_softc(dev); 684 685 DPRINTK("newstate=%d\n", newstate); 686 687 switch (newstate) { 688 case XenbusStateInitialising: 689 case XenbusStateInitialised: 690 case XenbusStateConnected: 691 case XenbusStateUnknown: 692 case XenbusStateClosed: 693 case XenbusStateReconfigured: 694 case XenbusStateReconfiguring: 695 break; 696 case XenbusStateInitWait: 697 if (xenbus_get_state(dev) != XenbusStateInitialising) 698 break; 699 if (network_connect(sc) != 0) 700 break; 701 xenbus_set_state(dev, XenbusStateConnected); 702 #ifdef INET 703 netfront_send_fake_arp(dev, sc); 704 #endif 705 break; 706 case XenbusStateClosing: 707 xenbus_set_state(dev, XenbusStateClosed); 708 break; 709 } 710 } 711 712 static void 713 xn_free_rx_ring(struct netfront_info *sc) 714 { 715 #if 0 716 int i; 717 718 for (i = 0; i < NET_RX_RING_SIZE; i++) { 719 if (sc->xn_cdata.rx_mbufs[i] != NULL) { 720 m_freem(sc->rx_mbufs[i]); 721 sc->rx_mbufs[i] = NULL; 722 } 723 } 724 725 sc->rx.rsp_cons = 0; 726 sc->xn_rx_if->req_prod = 0; 727 sc->xn_rx_if->event = sc->rx.rsp_cons ; 728 #endif 729 } 730 731 static void 732 xn_free_tx_ring(struct netfront_info *sc) 733 { 734 #if 0 735 int i; 736 737 for (i = 0; i < NET_TX_RING_SIZE; i++) { 738 if (sc->tx_mbufs[i] != NULL) { 739 m_freem(sc->tx_mbufs[i]); 740 sc->xn_cdata.xn_tx_chain[i] = NULL; 741 } 742 } 743 744 return; 745 #endif 746 } 747 748 /** 749 * \brief Verify that there is sufficient space in the Tx ring 750 * buffer for a maximally sized request to be enqueued. 751 * 752 * A transmit request requires a transmit descriptor for each packet 753 * fragment, plus up to 2 entries for "options" (e.g. TSO). 754 */ 755 static inline int 756 xn_tx_slot_available(struct netfront_info *np) 757 { 758 return (RING_FREE_REQUESTS(&np->tx) > (MAX_TX_REQ_FRAGS + 2)); 759 } 760 761 static void 762 netif_release_tx_bufs(struct netfront_info *np) 763 { 764 int i; 765 766 for (i = 1; i <= NET_TX_RING_SIZE; i++) { 767 struct mbuf *m; 768 769 m = np->tx_mbufs[i]; 770 771 /* 772 * We assume that no kernel addresses are 773 * less than NET_TX_RING_SIZE. Any entry 774 * in the table that is below this number 775 * must be an index from free-list tracking. 776 */ 777 if (((uintptr_t)m) <= NET_TX_RING_SIZE) 778 continue; 779 gnttab_end_foreign_access_ref(np->grant_tx_ref[i]); 780 gnttab_release_grant_reference(&np->gref_tx_head, 781 np->grant_tx_ref[i]); 782 np->grant_tx_ref[i] = GRANT_REF_INVALID; 783 add_id_to_freelist(np->tx_mbufs, i); 784 np->xn_cdata.xn_tx_chain_cnt--; 785 if (np->xn_cdata.xn_tx_chain_cnt < 0) { 786 panic("%s: tx_chain_cnt must be >= 0", __func__); 787 } 788 m_free(m); 789 } 790 } 791 792 static void 793 network_alloc_rx_buffers(struct netfront_info *sc) 794 { 795 int otherend_id = xenbus_get_otherend_id(sc->xbdev); 796 unsigned short id; 797 struct mbuf *m_new; 798 int i, batch_target, notify; 799 RING_IDX req_prod; 800 struct xen_memory_reservation reservation; 801 grant_ref_t ref; 802 int nr_flips; 803 netif_rx_request_t *req; 804 vm_offset_t vaddr; 805 u_long pfn; 806 807 req_prod = sc->rx.req_prod_pvt; 808 809 if (unlikely(sc->carrier == 0)) 810 return; 811 812 /* 813 * Allocate mbufs greedily, even though we batch updates to the 814 * receive ring. This creates a less bursty demand on the memory 815 * allocator, and so should reduce the chance of failed allocation 816 * requests both for ourself and for other kernel subsystems. 817 * 818 * Here we attempt to maintain rx_target buffers in flight, counting 819 * buffers that we have yet to process in the receive ring. 820 */ 821 batch_target = sc->rx_target - (req_prod - sc->rx.rsp_cons); 822 for (i = mbufq_len(&sc->xn_rx_batch); i < batch_target; i++) { 823 MGETHDR(m_new, M_NOWAIT, MT_DATA); 824 if (m_new == NULL) { 825 printf("%s: MGETHDR failed\n", __func__); 826 goto no_mbuf; 827 } 828 829 m_cljget(m_new, M_NOWAIT, MJUMPAGESIZE); 830 if ((m_new->m_flags & M_EXT) == 0) { 831 printf("%s: m_cljget failed\n", __func__); 832 m_freem(m_new); 833 834 no_mbuf: 835 if (i != 0) 836 goto refill; 837 /* 838 * XXX set timer 839 */ 840 break; 841 } 842 m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE; 843 844 /* queue the mbufs allocated */ 845 mbufq_tail(&sc->xn_rx_batch, m_new); 846 } 847 848 /* 849 * If we've allocated at least half of our target number of entries, 850 * submit them to the backend - we have enough to make the overhead 851 * of submission worthwhile. Otherwise wait for more mbufs and 852 * request entries to become available. 853 */ 854 if (i < (sc->rx_target/2)) { 855 if (req_prod >sc->rx.sring->req_prod) 856 goto push; 857 return; 858 } 859 860 /* 861 * Double floating fill target if we risked having the backend 862 * run out of empty buffers for receive traffic. We define "running 863 * low" as having less than a fourth of our target buffers free 864 * at the time we refilled the queue. 865 */ 866 if ((req_prod - sc->rx.sring->rsp_prod) < (sc->rx_target / 4)) { 867 sc->rx_target *= 2; 868 if (sc->rx_target > sc->rx_max_target) 869 sc->rx_target = sc->rx_max_target; 870 } 871 872 refill: 873 for (nr_flips = i = 0; ; i++) { 874 if ((m_new = mbufq_dequeue(&sc->xn_rx_batch)) == NULL) 875 break; 876 877 m_new->m_ext.ext_arg1 = (vm_paddr_t *)(uintptr_t)( 878 vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT); 879 880 id = xennet_rxidx(req_prod + i); 881 882 KASSERT(sc->rx_mbufs[id] == NULL, ("non-NULL xm_rx_chain")); 883 sc->rx_mbufs[id] = m_new; 884 885 ref = gnttab_claim_grant_reference(&sc->gref_rx_head); 886 KASSERT(ref != GNTTAB_LIST_END, 887 ("reserved grant references exhuasted")); 888 sc->grant_rx_ref[id] = ref; 889 890 vaddr = mtod(m_new, vm_offset_t); 891 pfn = vtophys(vaddr) >> PAGE_SHIFT; 892 req = RING_GET_REQUEST(&sc->rx, req_prod + i); 893 894 if (sc->copying_receiver == 0) { 895 gnttab_grant_foreign_transfer_ref(ref, 896 otherend_id, pfn); 897 sc->rx_pfn_array[nr_flips] = PFNTOMFN(pfn); 898 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 899 /* Remove this page before passing 900 * back to Xen. 901 */ 902 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 903 MULTI_update_va_mapping(&sc->rx_mcl[i], 904 vaddr, 0, 0); 905 } 906 nr_flips++; 907 } else { 908 gnttab_grant_foreign_access_ref(ref, 909 otherend_id, 910 PFNTOMFN(pfn), 0); 911 } 912 req->id = id; 913 req->gref = ref; 914 915 sc->rx_pfn_array[i] = 916 vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT; 917 } 918 919 KASSERT(i, ("no mbufs processed")); /* should have returned earlier */ 920 KASSERT(mbufq_len(&sc->xn_rx_batch) == 0, ("not all mbufs processed")); 921 /* 922 * We may have allocated buffers which have entries outstanding 923 * in the page * update queue -- make sure we flush those first! 924 */ 925 PT_UPDATES_FLUSH(); 926 if (nr_flips != 0) { 927 #ifdef notyet 928 /* Tell the ballon driver what is going on. */ 929 balloon_update_driver_allowance(i); 930 #endif 931 set_xen_guest_handle(reservation.extent_start, sc->rx_pfn_array); 932 reservation.nr_extents = i; 933 reservation.extent_order = 0; 934 reservation.address_bits = 0; 935 reservation.domid = DOMID_SELF; 936 937 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 938 /* After all PTEs have been zapped, flush the TLB. */ 939 sc->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = 940 UVMF_TLB_FLUSH|UVMF_ALL; 941 942 /* Give away a batch of pages. */ 943 sc->rx_mcl[i].op = __HYPERVISOR_memory_op; 944 sc->rx_mcl[i].args[0] = XENMEM_decrease_reservation; 945 sc->rx_mcl[i].args[1] = (u_long)&reservation; 946 /* Zap PTEs and give away pages in one big multicall. */ 947 (void)HYPERVISOR_multicall(sc->rx_mcl, i+1); 948 949 if (unlikely(sc->rx_mcl[i].result != i || 950 HYPERVISOR_memory_op(XENMEM_decrease_reservation, 951 &reservation) != i)) 952 panic("%s: unable to reduce memory " 953 "reservation\n", __func__); 954 } 955 } else { 956 wmb(); 957 } 958 959 /* Above is a suitable barrier to ensure backend will see requests. */ 960 sc->rx.req_prod_pvt = req_prod + i; 961 push: 962 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->rx, notify); 963 if (notify) 964 notify_remote_via_irq(sc->irq); 965 } 966 967 static void 968 xn_rxeof(struct netfront_info *np) 969 { 970 struct ifnet *ifp; 971 #if __FreeBSD_version >= 700000 972 struct lro_ctrl *lro = &np->xn_lro; 973 struct lro_entry *queued; 974 #endif 975 struct netfront_rx_info rinfo; 976 struct netif_rx_response *rx = &rinfo.rx; 977 struct netif_extra_info *extras = rinfo.extras; 978 RING_IDX i, rp; 979 multicall_entry_t *mcl; 980 struct mbuf *m; 981 struct mbuf_head rxq, errq; 982 int err, pages_flipped = 0, work_to_do; 983 984 do { 985 XN_RX_LOCK_ASSERT(np); 986 if (!netfront_carrier_ok(np)) 987 return; 988 989 mbufq_init(&errq); 990 mbufq_init(&rxq); 991 992 ifp = np->xn_ifp; 993 994 rp = np->rx.sring->rsp_prod; 995 rmb(); /* Ensure we see queued responses up to 'rp'. */ 996 997 i = np->rx.rsp_cons; 998 while ((i != rp)) { 999 memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx)); 1000 memset(extras, 0, sizeof(rinfo.extras)); 1001 1002 m = NULL; 1003 err = xennet_get_responses(np, &rinfo, rp, &i, &m, 1004 &pages_flipped); 1005 1006 if (unlikely(err)) { 1007 if (m) 1008 mbufq_tail(&errq, m); 1009 np->stats.rx_errors++; 1010 continue; 1011 } 1012 1013 m->m_pkthdr.rcvif = ifp; 1014 if ( rx->flags & NETRXF_data_validated ) { 1015 /* Tell the stack the checksums are okay */ 1016 /* 1017 * XXX this isn't necessarily the case - need to add 1018 * check 1019 */ 1020 1021 m->m_pkthdr.csum_flags |= 1022 (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID 1023 | CSUM_PSEUDO_HDR); 1024 m->m_pkthdr.csum_data = 0xffff; 1025 } 1026 1027 np->stats.rx_packets++; 1028 np->stats.rx_bytes += m->m_pkthdr.len; 1029 1030 mbufq_tail(&rxq, m); 1031 np->rx.rsp_cons = i; 1032 } 1033 1034 if (pages_flipped) { 1035 /* Some pages are no longer absent... */ 1036 #ifdef notyet 1037 balloon_update_driver_allowance(-pages_flipped); 1038 #endif 1039 /* Do all the remapping work, and M->P updates, in one big 1040 * hypercall. 1041 */ 1042 if (!!xen_feature(XENFEAT_auto_translated_physmap)) { 1043 mcl = np->rx_mcl + pages_flipped; 1044 mcl->op = __HYPERVISOR_mmu_update; 1045 mcl->args[0] = (u_long)np->rx_mmu; 1046 mcl->args[1] = pages_flipped; 1047 mcl->args[2] = 0; 1048 mcl->args[3] = DOMID_SELF; 1049 (void)HYPERVISOR_multicall(np->rx_mcl, 1050 pages_flipped + 1); 1051 } 1052 } 1053 1054 while ((m = mbufq_dequeue(&errq))) 1055 m_freem(m); 1056 1057 /* 1058 * Process all the mbufs after the remapping is complete. 1059 * Break the mbuf chain first though. 1060 */ 1061 while ((m = mbufq_dequeue(&rxq)) != NULL) { 1062 ifp->if_ipackets++; 1063 1064 /* 1065 * Do we really need to drop the rx lock? 1066 */ 1067 XN_RX_UNLOCK(np); 1068 #if __FreeBSD_version >= 700000 1069 /* Use LRO if possible */ 1070 if ((ifp->if_capenable & IFCAP_LRO) == 0 || 1071 lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) { 1072 /* 1073 * If LRO fails, pass up to the stack 1074 * directly. 1075 */ 1076 (*ifp->if_input)(ifp, m); 1077 } 1078 #else 1079 (*ifp->if_input)(ifp, m); 1080 #endif 1081 XN_RX_LOCK(np); 1082 } 1083 1084 np->rx.rsp_cons = i; 1085 1086 #if __FreeBSD_version >= 700000 1087 /* 1088 * Flush any outstanding LRO work 1089 */ 1090 while (!SLIST_EMPTY(&lro->lro_active)) { 1091 queued = SLIST_FIRST(&lro->lro_active); 1092 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1093 tcp_lro_flush(lro, queued); 1094 } 1095 #endif 1096 1097 #if 0 1098 /* If we get a callback with very few responses, reduce fill target. */ 1099 /* NB. Note exponential increase, linear decrease. */ 1100 if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > 1101 ((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target)) 1102 np->rx_target = np->rx_min_target; 1103 #endif 1104 1105 network_alloc_rx_buffers(np); 1106 1107 RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, work_to_do); 1108 } while (work_to_do); 1109 } 1110 1111 static void 1112 xn_txeof(struct netfront_info *np) 1113 { 1114 RING_IDX i, prod; 1115 unsigned short id; 1116 struct ifnet *ifp; 1117 netif_tx_response_t *txr; 1118 struct mbuf *m; 1119 1120 XN_TX_LOCK_ASSERT(np); 1121 1122 if (!netfront_carrier_ok(np)) 1123 return; 1124 1125 ifp = np->xn_ifp; 1126 1127 do { 1128 prod = np->tx.sring->rsp_prod; 1129 rmb(); /* Ensure we see responses up to 'rp'. */ 1130 1131 for (i = np->tx.rsp_cons; i != prod; i++) { 1132 txr = RING_GET_RESPONSE(&np->tx, i); 1133 if (txr->status == NETIF_RSP_NULL) 1134 continue; 1135 1136 if (txr->status != NETIF_RSP_OKAY) { 1137 printf("%s: WARNING: response is %d!\n", 1138 __func__, txr->status); 1139 } 1140 id = txr->id; 1141 m = np->tx_mbufs[id]; 1142 KASSERT(m != NULL, ("mbuf not found in xn_tx_chain")); 1143 KASSERT((uintptr_t)m > NET_TX_RING_SIZE, 1144 ("mbuf already on the free list, but we're " 1145 "trying to free it again!")); 1146 M_ASSERTVALID(m); 1147 1148 /* 1149 * Increment packet count if this is the last 1150 * mbuf of the chain. 1151 */ 1152 if (!m->m_next) 1153 ifp->if_opackets++; 1154 if (unlikely(gnttab_query_foreign_access( 1155 np->grant_tx_ref[id]) != 0)) { 1156 panic("%s: grant id %u still in use by the " 1157 "backend", __func__, id); 1158 } 1159 gnttab_end_foreign_access_ref( 1160 np->grant_tx_ref[id]); 1161 gnttab_release_grant_reference( 1162 &np->gref_tx_head, np->grant_tx_ref[id]); 1163 np->grant_tx_ref[id] = GRANT_REF_INVALID; 1164 1165 np->tx_mbufs[id] = NULL; 1166 add_id_to_freelist(np->tx_mbufs, id); 1167 np->xn_cdata.xn_tx_chain_cnt--; 1168 m_free(m); 1169 /* Only mark the queue active if we've freed up at least one slot to try */ 1170 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1171 } 1172 np->tx.rsp_cons = prod; 1173 1174 /* 1175 * Set a new event, then check for race with update of 1176 * tx_cons. Note that it is essential to schedule a 1177 * callback, no matter how few buffers are pending. Even if 1178 * there is space in the transmit ring, higher layers may 1179 * be blocked because too much data is outstanding: in such 1180 * cases notification from Xen is likely to be the only kick 1181 * that we'll get. 1182 */ 1183 np->tx.sring->rsp_event = 1184 prod + ((np->tx.sring->req_prod - prod) >> 1) + 1; 1185 1186 mb(); 1187 } while (prod != np->tx.sring->rsp_prod); 1188 1189 if (np->tx_full && 1190 ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE)) { 1191 np->tx_full = 0; 1192 #if 0 1193 if (np->user_state == UST_OPEN) 1194 netif_wake_queue(dev); 1195 #endif 1196 } 1197 } 1198 1199 static void 1200 xn_intr(void *xsc) 1201 { 1202 struct netfront_info *np = xsc; 1203 struct ifnet *ifp = np->xn_ifp; 1204 1205 #if 0 1206 if (!(np->rx.rsp_cons != np->rx.sring->rsp_prod && 1207 likely(netfront_carrier_ok(np)) && 1208 ifp->if_drv_flags & IFF_DRV_RUNNING)) 1209 return; 1210 #endif 1211 if (RING_HAS_UNCONSUMED_RESPONSES(&np->tx)) { 1212 XN_TX_LOCK(np); 1213 xn_txeof(np); 1214 XN_TX_UNLOCK(np); 1215 } 1216 1217 XN_RX_LOCK(np); 1218 xn_rxeof(np); 1219 XN_RX_UNLOCK(np); 1220 1221 if (ifp->if_drv_flags & IFF_DRV_RUNNING && 1222 !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) 1223 xn_start(ifp); 1224 } 1225 1226 static void 1227 xennet_move_rx_slot(struct netfront_info *np, struct mbuf *m, 1228 grant_ref_t ref) 1229 { 1230 int new = xennet_rxidx(np->rx.req_prod_pvt); 1231 1232 KASSERT(np->rx_mbufs[new] == NULL, ("rx_mbufs != NULL")); 1233 np->rx_mbufs[new] = m; 1234 np->grant_rx_ref[new] = ref; 1235 RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new; 1236 RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref; 1237 np->rx.req_prod_pvt++; 1238 } 1239 1240 static int 1241 xennet_get_extras(struct netfront_info *np, 1242 struct netif_extra_info *extras, RING_IDX rp, RING_IDX *cons) 1243 { 1244 struct netif_extra_info *extra; 1245 1246 int err = 0; 1247 1248 do { 1249 struct mbuf *m; 1250 grant_ref_t ref; 1251 1252 if (unlikely(*cons + 1 == rp)) { 1253 #if 0 1254 if (net_ratelimit()) 1255 WPRINTK("Missing extra info\n"); 1256 #endif 1257 err = EINVAL; 1258 break; 1259 } 1260 1261 extra = (struct netif_extra_info *) 1262 RING_GET_RESPONSE(&np->rx, ++(*cons)); 1263 1264 if (unlikely(!extra->type || 1265 extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 1266 #if 0 1267 if (net_ratelimit()) 1268 WPRINTK("Invalid extra type: %d\n", 1269 extra->type); 1270 #endif 1271 err = EINVAL; 1272 } else { 1273 memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); 1274 } 1275 1276 m = xennet_get_rx_mbuf(np, *cons); 1277 ref = xennet_get_rx_ref(np, *cons); 1278 xennet_move_rx_slot(np, m, ref); 1279 } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); 1280 1281 return err; 1282 } 1283 1284 static int 1285 xennet_get_responses(struct netfront_info *np, 1286 struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, 1287 struct mbuf **list, 1288 int *pages_flipped_p) 1289 { 1290 int pages_flipped = *pages_flipped_p; 1291 struct mmu_update *mmu; 1292 struct multicall_entry *mcl; 1293 struct netif_rx_response *rx = &rinfo->rx; 1294 struct netif_extra_info *extras = rinfo->extras; 1295 struct mbuf *m, *m0, *m_prev; 1296 grant_ref_t ref = xennet_get_rx_ref(np, *cons); 1297 RING_IDX ref_cons = *cons; 1298 int frags = 1; 1299 int err = 0; 1300 u_long ret; 1301 1302 m0 = m = m_prev = xennet_get_rx_mbuf(np, *cons); 1303 1304 if (rx->flags & NETRXF_extra_info) { 1305 err = xennet_get_extras(np, extras, rp, cons); 1306 } 1307 1308 if (m0 != NULL) { 1309 m0->m_pkthdr.len = 0; 1310 m0->m_next = NULL; 1311 } 1312 1313 for (;;) { 1314 u_long mfn; 1315 1316 #if 0 1317 DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n", 1318 rx->status, rx->offset, frags); 1319 #endif 1320 if (unlikely(rx->status < 0 || 1321 rx->offset + rx->status > PAGE_SIZE)) { 1322 1323 #if 0 1324 if (net_ratelimit()) 1325 WPRINTK("rx->offset: %x, size: %u\n", 1326 rx->offset, rx->status); 1327 #endif 1328 xennet_move_rx_slot(np, m, ref); 1329 if (m0 == m) 1330 m0 = NULL; 1331 m = NULL; 1332 err = EINVAL; 1333 goto next_skip_queue; 1334 } 1335 1336 /* 1337 * This definitely indicates a bug, either in this driver or in 1338 * the backend driver. In future this should flag the bad 1339 * situation to the system controller to reboot the backed. 1340 */ 1341 if (ref == GRANT_REF_INVALID) { 1342 1343 #if 0 1344 if (net_ratelimit()) 1345 WPRINTK("Bad rx response id %d.\n", rx->id); 1346 #endif 1347 printf("%s: Bad rx response id %d.\n", __func__,rx->id); 1348 err = EINVAL; 1349 goto next; 1350 } 1351 1352 if (!np->copying_receiver) { 1353 /* Memory pressure, insufficient buffer 1354 * headroom, ... 1355 */ 1356 if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) { 1357 WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n", 1358 rx->id, rx->status); 1359 xennet_move_rx_slot(np, m, ref); 1360 err = ENOMEM; 1361 goto next; 1362 } 1363 1364 if (!xen_feature( XENFEAT_auto_translated_physmap)) { 1365 /* Remap the page. */ 1366 void *vaddr = mtod(m, void *); 1367 uint32_t pfn; 1368 1369 mcl = np->rx_mcl + pages_flipped; 1370 mmu = np->rx_mmu + pages_flipped; 1371 1372 MULTI_update_va_mapping(mcl, (u_long)vaddr, 1373 (((vm_paddr_t)mfn) << PAGE_SHIFT) | PG_RW | 1374 PG_V | PG_M | PG_A, 0); 1375 pfn = (uintptr_t)m->m_ext.ext_arg1; 1376 mmu->ptr = ((vm_paddr_t)mfn << PAGE_SHIFT) | 1377 MMU_MACHPHYS_UPDATE; 1378 mmu->val = pfn; 1379 1380 set_phys_to_machine(pfn, mfn); 1381 } 1382 pages_flipped++; 1383 } else { 1384 ret = gnttab_end_foreign_access_ref(ref); 1385 KASSERT(ret, ("ret != 0")); 1386 } 1387 1388 gnttab_release_grant_reference(&np->gref_rx_head, ref); 1389 1390 next: 1391 if (m == NULL) 1392 break; 1393 1394 m->m_len = rx->status; 1395 m->m_data += rx->offset; 1396 m0->m_pkthdr.len += rx->status; 1397 1398 next_skip_queue: 1399 if (!(rx->flags & NETRXF_more_data)) 1400 break; 1401 1402 if (*cons + frags == rp) { 1403 if (net_ratelimit()) 1404 WPRINTK("Need more frags\n"); 1405 err = ENOENT; 1406 printf("%s: cons %u frags %u rp %u, not enough frags\n", 1407 __func__, *cons, frags, rp); 1408 break; 1409 } 1410 /* 1411 * Note that m can be NULL, if rx->status < 0 or if 1412 * rx->offset + rx->status > PAGE_SIZE above. 1413 */ 1414 m_prev = m; 1415 1416 rx = RING_GET_RESPONSE(&np->rx, *cons + frags); 1417 m = xennet_get_rx_mbuf(np, *cons + frags); 1418 1419 /* 1420 * m_prev == NULL can happen if rx->status < 0 or if 1421 * rx->offset + * rx->status > PAGE_SIZE above. 1422 */ 1423 if (m_prev != NULL) 1424 m_prev->m_next = m; 1425 1426 /* 1427 * m0 can be NULL if rx->status < 0 or if * rx->offset + 1428 * rx->status > PAGE_SIZE above. 1429 */ 1430 if (m0 == NULL) 1431 m0 = m; 1432 m->m_next = NULL; 1433 ref = xennet_get_rx_ref(np, *cons + frags); 1434 ref_cons = *cons + frags; 1435 frags++; 1436 } 1437 *list = m0; 1438 *cons += frags; 1439 *pages_flipped_p = pages_flipped; 1440 1441 return (err); 1442 } 1443 1444 static void 1445 xn_tick_locked(struct netfront_info *sc) 1446 { 1447 XN_RX_LOCK_ASSERT(sc); 1448 callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc); 1449 1450 /* XXX placeholder for printing debug information */ 1451 } 1452 1453 static void 1454 xn_tick(void *xsc) 1455 { 1456 struct netfront_info *sc; 1457 1458 sc = xsc; 1459 XN_RX_LOCK(sc); 1460 xn_tick_locked(sc); 1461 XN_RX_UNLOCK(sc); 1462 } 1463 1464 /** 1465 * \brief Count the number of fragments in an mbuf chain. 1466 * 1467 * Surprisingly, there isn't an M* macro for this. 1468 */ 1469 static inline int 1470 xn_count_frags(struct mbuf *m) 1471 { 1472 int nfrags; 1473 1474 for (nfrags = 0; m != NULL; m = m->m_next) 1475 nfrags++; 1476 1477 return (nfrags); 1478 } 1479 1480 /** 1481 * Given an mbuf chain, make sure we have enough room and then push 1482 * it onto the transmit ring. 1483 */ 1484 static int 1485 xn_assemble_tx_request(struct netfront_info *sc, struct mbuf *m_head) 1486 { 1487 struct ifnet *ifp; 1488 struct mbuf *m; 1489 u_int nfrags; 1490 netif_extra_info_t *extra; 1491 int otherend_id; 1492 1493 ifp = sc->xn_ifp; 1494 1495 /** 1496 * Defragment the mbuf if necessary. 1497 */ 1498 nfrags = xn_count_frags(m_head); 1499 1500 /* 1501 * Check to see whether this request is longer than netback 1502 * can handle, and try to defrag it. 1503 */ 1504 /** 1505 * It is a bit lame, but the netback driver in Linux can't 1506 * deal with nfrags > MAX_TX_REQ_FRAGS, which is a quirk of 1507 * the Linux network stack. 1508 */ 1509 if (nfrags > sc->maxfrags) { 1510 m = m_defrag(m_head, M_NOWAIT); 1511 if (!m) { 1512 /* 1513 * Defrag failed, so free the mbuf and 1514 * therefore drop the packet. 1515 */ 1516 m_freem(m_head); 1517 return (EMSGSIZE); 1518 } 1519 m_head = m; 1520 } 1521 1522 /* Determine how many fragments now exist */ 1523 nfrags = xn_count_frags(m_head); 1524 1525 /* 1526 * Check to see whether the defragmented packet has too many 1527 * segments for the Linux netback driver. 1528 */ 1529 /** 1530 * The FreeBSD TCP stack, with TSO enabled, can produce a chain 1531 * of mbufs longer than Linux can handle. Make sure we don't 1532 * pass a too-long chain over to the other side by dropping the 1533 * packet. It doesn't look like there is currently a way to 1534 * tell the TCP stack to generate a shorter chain of packets. 1535 */ 1536 if (nfrags > MAX_TX_REQ_FRAGS) { 1537 #ifdef DEBUG 1538 printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback " 1539 "won't be able to handle it, dropping\n", 1540 __func__, nfrags, MAX_TX_REQ_FRAGS); 1541 #endif 1542 m_freem(m_head); 1543 return (EMSGSIZE); 1544 } 1545 1546 /* 1547 * This check should be redundant. We've already verified that we 1548 * have enough slots in the ring to handle a packet of maximum 1549 * size, and that our packet is less than the maximum size. Keep 1550 * it in here as an assert for now just to make certain that 1551 * xn_tx_chain_cnt is accurate. 1552 */ 1553 KASSERT((sc->xn_cdata.xn_tx_chain_cnt + nfrags) <= NET_TX_RING_SIZE, 1554 ("%s: xn_tx_chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE " 1555 "(%d)!", __func__, (int) sc->xn_cdata.xn_tx_chain_cnt, 1556 (int) nfrags, (int) NET_TX_RING_SIZE)); 1557 1558 /* 1559 * Start packing the mbufs in this chain into 1560 * the fragment pointers. Stop when we run out 1561 * of fragments or hit the end of the mbuf chain. 1562 */ 1563 m = m_head; 1564 extra = NULL; 1565 otherend_id = xenbus_get_otherend_id(sc->xbdev); 1566 for (m = m_head; m; m = m->m_next) { 1567 netif_tx_request_t *tx; 1568 uintptr_t id; 1569 grant_ref_t ref; 1570 u_long mfn; /* XXX Wrong type? */ 1571 1572 tx = RING_GET_REQUEST(&sc->tx, sc->tx.req_prod_pvt); 1573 id = get_id_from_freelist(sc->tx_mbufs); 1574 if (id == 0) 1575 panic("%s: was allocated the freelist head!\n", 1576 __func__); 1577 sc->xn_cdata.xn_tx_chain_cnt++; 1578 if (sc->xn_cdata.xn_tx_chain_cnt > NET_TX_RING_SIZE) 1579 panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n", 1580 __func__); 1581 sc->tx_mbufs[id] = m; 1582 tx->id = id; 1583 ref = gnttab_claim_grant_reference(&sc->gref_tx_head); 1584 KASSERT((short)ref >= 0, ("Negative ref")); 1585 mfn = virt_to_mfn(mtod(m, vm_offset_t)); 1586 gnttab_grant_foreign_access_ref(ref, otherend_id, 1587 mfn, GNTMAP_readonly); 1588 tx->gref = sc->grant_tx_ref[id] = ref; 1589 tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1); 1590 tx->flags = 0; 1591 if (m == m_head) { 1592 /* 1593 * The first fragment has the entire packet 1594 * size, subsequent fragments have just the 1595 * fragment size. The backend works out the 1596 * true size of the first fragment by 1597 * subtracting the sizes of the other 1598 * fragments. 1599 */ 1600 tx->size = m->m_pkthdr.len; 1601 1602 /* 1603 * The first fragment contains the checksum flags 1604 * and is optionally followed by extra data for 1605 * TSO etc. 1606 */ 1607 /** 1608 * CSUM_TSO requires checksum offloading. 1609 * Some versions of FreeBSD fail to 1610 * set CSUM_TCP in the CSUM_TSO case, 1611 * so we have to test for CSUM_TSO 1612 * explicitly. 1613 */ 1614 if (m->m_pkthdr.csum_flags 1615 & (CSUM_DELAY_DATA | CSUM_TSO)) { 1616 tx->flags |= (NETTXF_csum_blank 1617 | NETTXF_data_validated); 1618 } 1619 #if __FreeBSD_version >= 700000 1620 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 1621 struct netif_extra_info *gso = 1622 (struct netif_extra_info *) 1623 RING_GET_REQUEST(&sc->tx, 1624 ++sc->tx.req_prod_pvt); 1625 1626 tx->flags |= NETTXF_extra_info; 1627 1628 gso->u.gso.size = m->m_pkthdr.tso_segsz; 1629 gso->u.gso.type = 1630 XEN_NETIF_GSO_TYPE_TCPV4; 1631 gso->u.gso.pad = 0; 1632 gso->u.gso.features = 0; 1633 1634 gso->type = XEN_NETIF_EXTRA_TYPE_GSO; 1635 gso->flags = 0; 1636 } 1637 #endif 1638 } else { 1639 tx->size = m->m_len; 1640 } 1641 if (m->m_next) 1642 tx->flags |= NETTXF_more_data; 1643 1644 sc->tx.req_prod_pvt++; 1645 } 1646 BPF_MTAP(ifp, m_head); 1647 1648 sc->stats.tx_bytes += m_head->m_pkthdr.len; 1649 sc->stats.tx_packets++; 1650 1651 return (0); 1652 } 1653 1654 static void 1655 xn_start_locked(struct ifnet *ifp) 1656 { 1657 struct netfront_info *sc; 1658 struct mbuf *m_head; 1659 int notify; 1660 1661 sc = ifp->if_softc; 1662 1663 if (!netfront_carrier_ok(sc)) 1664 return; 1665 1666 /* 1667 * While we have enough transmit slots available for at least one 1668 * maximum-sized packet, pull mbufs off the queue and put them on 1669 * the transmit ring. 1670 */ 1671 while (xn_tx_slot_available(sc)) { 1672 IF_DEQUEUE(&ifp->if_snd, m_head); 1673 if (m_head == NULL) 1674 break; 1675 1676 if (xn_assemble_tx_request(sc, m_head) != 0) 1677 break; 1678 } 1679 1680 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->tx, notify); 1681 if (notify) 1682 notify_remote_via_irq(sc->irq); 1683 1684 if (RING_FULL(&sc->tx)) { 1685 sc->tx_full = 1; 1686 #if 0 1687 netif_stop_queue(dev); 1688 #endif 1689 } 1690 } 1691 1692 static void 1693 xn_start(struct ifnet *ifp) 1694 { 1695 struct netfront_info *sc; 1696 sc = ifp->if_softc; 1697 XN_TX_LOCK(sc); 1698 xn_start_locked(ifp); 1699 XN_TX_UNLOCK(sc); 1700 } 1701 1702 /* equivalent of network_open() in Linux */ 1703 static void 1704 xn_ifinit_locked(struct netfront_info *sc) 1705 { 1706 struct ifnet *ifp; 1707 1708 XN_LOCK_ASSERT(sc); 1709 1710 ifp = sc->xn_ifp; 1711 1712 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1713 return; 1714 1715 xn_stop(sc); 1716 1717 network_alloc_rx_buffers(sc); 1718 sc->rx.sring->rsp_event = sc->rx.rsp_cons + 1; 1719 1720 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1721 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1722 if_link_state_change(ifp, LINK_STATE_UP); 1723 1724 callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc); 1725 } 1726 1727 static void 1728 xn_ifinit(void *xsc) 1729 { 1730 struct netfront_info *sc = xsc; 1731 1732 XN_LOCK(sc); 1733 xn_ifinit_locked(sc); 1734 XN_UNLOCK(sc); 1735 } 1736 1737 static int 1738 xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1739 { 1740 struct netfront_info *sc = ifp->if_softc; 1741 struct ifreq *ifr = (struct ifreq *) data; 1742 #ifdef INET 1743 struct ifaddr *ifa = (struct ifaddr *)data; 1744 #endif 1745 1746 int mask, error = 0; 1747 switch(cmd) { 1748 case SIOCSIFADDR: 1749 case SIOCGIFADDR: 1750 #ifdef INET 1751 XN_LOCK(sc); 1752 if (ifa->ifa_addr->sa_family == AF_INET) { 1753 ifp->if_flags |= IFF_UP; 1754 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 1755 xn_ifinit_locked(sc); 1756 arp_ifinit(ifp, ifa); 1757 XN_UNLOCK(sc); 1758 } else { 1759 XN_UNLOCK(sc); 1760 #endif 1761 error = ether_ioctl(ifp, cmd, data); 1762 #ifdef INET 1763 } 1764 #endif 1765 break; 1766 case SIOCSIFMTU: 1767 /* XXX can we alter the MTU on a VN ?*/ 1768 #ifdef notyet 1769 if (ifr->ifr_mtu > XN_JUMBO_MTU) 1770 error = EINVAL; 1771 else 1772 #endif 1773 { 1774 ifp->if_mtu = ifr->ifr_mtu; 1775 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1776 xn_ifinit(sc); 1777 } 1778 break; 1779 case SIOCSIFFLAGS: 1780 XN_LOCK(sc); 1781 if (ifp->if_flags & IFF_UP) { 1782 /* 1783 * If only the state of the PROMISC flag changed, 1784 * then just use the 'set promisc mode' command 1785 * instead of reinitializing the entire NIC. Doing 1786 * a full re-init means reloading the firmware and 1787 * waiting for it to start up, which may take a 1788 * second or two. 1789 */ 1790 #ifdef notyet 1791 /* No promiscuous mode with Xen */ 1792 if (ifp->if_drv_flags & IFF_DRV_RUNNING && 1793 ifp->if_flags & IFF_PROMISC && 1794 !(sc->xn_if_flags & IFF_PROMISC)) { 1795 XN_SETBIT(sc, XN_RX_MODE, 1796 XN_RXMODE_RX_PROMISC); 1797 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && 1798 !(ifp->if_flags & IFF_PROMISC) && 1799 sc->xn_if_flags & IFF_PROMISC) { 1800 XN_CLRBIT(sc, XN_RX_MODE, 1801 XN_RXMODE_RX_PROMISC); 1802 } else 1803 #endif 1804 xn_ifinit_locked(sc); 1805 } else { 1806 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1807 xn_stop(sc); 1808 } 1809 } 1810 sc->xn_if_flags = ifp->if_flags; 1811 XN_UNLOCK(sc); 1812 error = 0; 1813 break; 1814 case SIOCSIFCAP: 1815 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 1816 if (mask & IFCAP_TXCSUM) { 1817 if (IFCAP_TXCSUM & ifp->if_capenable) { 1818 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 1819 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 1820 | CSUM_IP | CSUM_TSO); 1821 } else { 1822 ifp->if_capenable |= IFCAP_TXCSUM; 1823 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP 1824 | CSUM_IP); 1825 } 1826 } 1827 if (mask & IFCAP_RXCSUM) { 1828 ifp->if_capenable ^= IFCAP_RXCSUM; 1829 } 1830 #if __FreeBSD_version >= 700000 1831 if (mask & IFCAP_TSO4) { 1832 if (IFCAP_TSO4 & ifp->if_capenable) { 1833 ifp->if_capenable &= ~IFCAP_TSO4; 1834 ifp->if_hwassist &= ~CSUM_TSO; 1835 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 1836 ifp->if_capenable |= IFCAP_TSO4; 1837 ifp->if_hwassist |= CSUM_TSO; 1838 } else { 1839 IPRINTK("Xen requires tx checksum offload" 1840 " be enabled to use TSO\n"); 1841 error = EINVAL; 1842 } 1843 } 1844 if (mask & IFCAP_LRO) { 1845 ifp->if_capenable ^= IFCAP_LRO; 1846 1847 } 1848 #endif 1849 error = 0; 1850 break; 1851 case SIOCADDMULTI: 1852 case SIOCDELMULTI: 1853 #ifdef notyet 1854 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1855 XN_LOCK(sc); 1856 xn_setmulti(sc); 1857 XN_UNLOCK(sc); 1858 error = 0; 1859 } 1860 #endif 1861 /* FALLTHROUGH */ 1862 case SIOCSIFMEDIA: 1863 case SIOCGIFMEDIA: 1864 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 1865 break; 1866 default: 1867 error = ether_ioctl(ifp, cmd, data); 1868 } 1869 1870 return (error); 1871 } 1872 1873 static void 1874 xn_stop(struct netfront_info *sc) 1875 { 1876 struct ifnet *ifp; 1877 1878 XN_LOCK_ASSERT(sc); 1879 1880 ifp = sc->xn_ifp; 1881 1882 callout_stop(&sc->xn_stat_ch); 1883 1884 xn_free_rx_ring(sc); 1885 xn_free_tx_ring(sc); 1886 1887 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 1888 if_link_state_change(ifp, LINK_STATE_DOWN); 1889 } 1890 1891 /* START of Xenolinux helper functions adapted to FreeBSD */ 1892 int 1893 network_connect(struct netfront_info *np) 1894 { 1895 int i, requeue_idx, error; 1896 grant_ref_t ref; 1897 netif_rx_request_t *req; 1898 u_int feature_rx_copy, feature_rx_flip; 1899 1900 error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 1901 "feature-rx-copy", NULL, "%u", &feature_rx_copy); 1902 if (error) 1903 feature_rx_copy = 0; 1904 error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 1905 "feature-rx-flip", NULL, "%u", &feature_rx_flip); 1906 if (error) 1907 feature_rx_flip = 1; 1908 1909 /* 1910 * Copy packets on receive path if: 1911 * (a) This was requested by user, and the backend supports it; or 1912 * (b) Flipping was requested, but this is unsupported by the backend. 1913 */ 1914 np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) || 1915 (MODPARM_rx_flip && !feature_rx_flip)); 1916 1917 /* Recovery procedure: */ 1918 error = talk_to_backend(np->xbdev, np); 1919 if (error) 1920 return (error); 1921 1922 /* Step 1: Reinitialise variables. */ 1923 xn_query_features(np); 1924 xn_configure_features(np); 1925 netif_release_tx_bufs(np); 1926 1927 /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ 1928 for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { 1929 struct mbuf *m; 1930 u_long pfn; 1931 1932 if (np->rx_mbufs[i] == NULL) 1933 continue; 1934 1935 m = np->rx_mbufs[requeue_idx] = xennet_get_rx_mbuf(np, i); 1936 ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i); 1937 1938 req = RING_GET_REQUEST(&np->rx, requeue_idx); 1939 pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT; 1940 1941 if (!np->copying_receiver) { 1942 gnttab_grant_foreign_transfer_ref(ref, 1943 xenbus_get_otherend_id(np->xbdev), 1944 pfn); 1945 } else { 1946 gnttab_grant_foreign_access_ref(ref, 1947 xenbus_get_otherend_id(np->xbdev), 1948 PFNTOMFN(pfn), 0); 1949 } 1950 req->gref = ref; 1951 req->id = requeue_idx; 1952 1953 requeue_idx++; 1954 } 1955 1956 np->rx.req_prod_pvt = requeue_idx; 1957 1958 /* Step 3: All public and private state should now be sane. Get 1959 * ready to start sending and receiving packets and give the driver 1960 * domain a kick because we've probably just requeued some 1961 * packets. 1962 */ 1963 netfront_carrier_on(np); 1964 notify_remote_via_irq(np->irq); 1965 XN_TX_LOCK(np); 1966 xn_txeof(np); 1967 XN_TX_UNLOCK(np); 1968 network_alloc_rx_buffers(np); 1969 1970 return (0); 1971 } 1972 1973 static void 1974 show_device(struct netfront_info *sc) 1975 { 1976 #ifdef DEBUG 1977 if (sc) { 1978 IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n", 1979 sc->xn_ifno, 1980 be_state_name[sc->xn_backend_state], 1981 sc->xn_user_state ? "open" : "closed", 1982 sc->xn_evtchn, 1983 sc->xn_irq, 1984 sc->xn_tx_if, 1985 sc->xn_rx_if); 1986 } else { 1987 IPRINTK("<vif NULL>\n"); 1988 } 1989 #endif 1990 } 1991 1992 static void 1993 xn_query_features(struct netfront_info *np) 1994 { 1995 int val; 1996 1997 device_printf(np->xbdev, "backend features:"); 1998 1999 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2000 "feature-sg", NULL, "%d", &val) < 0) 2001 val = 0; 2002 2003 np->maxfrags = 1; 2004 if (val) { 2005 np->maxfrags = MAX_TX_REQ_FRAGS; 2006 printf(" feature-sg"); 2007 } 2008 2009 if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), 2010 "feature-gso-tcpv4", NULL, "%d", &val) < 0) 2011 val = 0; 2012 2013 np->xn_ifp->if_capabilities &= ~(IFCAP_TSO4|IFCAP_LRO); 2014 if (val) { 2015 np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO; 2016 printf(" feature-gso-tcp4"); 2017 } 2018 2019 printf("\n"); 2020 } 2021 2022 static int 2023 xn_configure_features(struct netfront_info *np) 2024 { 2025 int err; 2026 2027 err = 0; 2028 #if __FreeBSD_version >= 700000 2029 if ((np->xn_ifp->if_capenable & IFCAP_LRO) != 0) 2030 tcp_lro_free(&np->xn_lro); 2031 #endif 2032 np->xn_ifp->if_capenable = 2033 np->xn_ifp->if_capabilities & ~(IFCAP_LRO|IFCAP_TSO4); 2034 np->xn_ifp->if_hwassist &= ~CSUM_TSO; 2035 #if __FreeBSD_version >= 700000 2036 if (xn_enable_lro && (np->xn_ifp->if_capabilities & IFCAP_LRO) != 0) { 2037 err = tcp_lro_init(&np->xn_lro); 2038 if (err) { 2039 device_printf(np->xbdev, "LRO initialization failed\n"); 2040 } else { 2041 np->xn_lro.ifp = np->xn_ifp; 2042 np->xn_ifp->if_capenable |= IFCAP_LRO; 2043 } 2044 } 2045 if ((np->xn_ifp->if_capabilities & IFCAP_TSO4) != 0) { 2046 np->xn_ifp->if_capenable |= IFCAP_TSO4; 2047 np->xn_ifp->if_hwassist |= CSUM_TSO; 2048 } 2049 #endif 2050 return (err); 2051 } 2052 2053 /** Create a network device. 2054 * @param handle device handle 2055 */ 2056 int 2057 create_netdev(device_t dev) 2058 { 2059 int i; 2060 struct netfront_info *np; 2061 int err; 2062 struct ifnet *ifp; 2063 2064 np = device_get_softc(dev); 2065 2066 np->xbdev = dev; 2067 2068 XN_LOCK_INIT(np, xennetif); 2069 2070 ifmedia_init(&np->sc_media, 0, xn_ifmedia_upd, xn_ifmedia_sts); 2071 ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); 2072 ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL); 2073 2074 np->rx_target = RX_MIN_TARGET; 2075 np->rx_min_target = RX_MIN_TARGET; 2076 np->rx_max_target = RX_MAX_TARGET; 2077 2078 /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ 2079 for (i = 0; i <= NET_TX_RING_SIZE; i++) { 2080 np->tx_mbufs[i] = (void *) ((u_long) i+1); 2081 np->grant_tx_ref[i] = GRANT_REF_INVALID; 2082 } 2083 np->tx_mbufs[NET_TX_RING_SIZE] = (void *)0; 2084 2085 for (i = 0; i <= NET_RX_RING_SIZE; i++) { 2086 2087 np->rx_mbufs[i] = NULL; 2088 np->grant_rx_ref[i] = GRANT_REF_INVALID; 2089 } 2090 /* A grant for every tx ring slot */ 2091 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, 2092 &np->gref_tx_head) != 0) { 2093 IPRINTK("#### netfront can't alloc tx grant refs\n"); 2094 err = ENOMEM; 2095 goto exit; 2096 } 2097 /* A grant for every rx ring slot */ 2098 if (gnttab_alloc_grant_references(RX_MAX_TARGET, 2099 &np->gref_rx_head) != 0) { 2100 WPRINTK("#### netfront can't alloc rx grant refs\n"); 2101 gnttab_free_grant_references(np->gref_tx_head); 2102 err = ENOMEM; 2103 goto exit; 2104 } 2105 2106 err = xen_net_read_mac(dev, np->mac); 2107 if (err) 2108 goto out; 2109 2110 /* Set up ifnet structure */ 2111 ifp = np->xn_ifp = if_alloc(IFT_ETHER); 2112 ifp->if_softc = np; 2113 if_initname(ifp, "xn", device_get_unit(dev)); 2114 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 2115 ifp->if_ioctl = xn_ioctl; 2116 ifp->if_output = ether_output; 2117 ifp->if_start = xn_start; 2118 #ifdef notyet 2119 ifp->if_watchdog = xn_watchdog; 2120 #endif 2121 ifp->if_init = xn_ifinit; 2122 ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1; 2123 2124 ifp->if_hwassist = XN_CSUM_FEATURES; 2125 ifp->if_capabilities = IFCAP_HWCSUM; 2126 ifp->if_hw_tsomax = NF_TSO_MAXBURST; 2127 2128 ether_ifattach(ifp, np->mac); 2129 callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE); 2130 netfront_carrier_off(np); 2131 2132 return (0); 2133 2134 exit: 2135 gnttab_free_grant_references(np->gref_tx_head); 2136 out: 2137 return (err); 2138 } 2139 2140 /** 2141 * Handle the change of state of the backend to Closing. We must delete our 2142 * device-layer structures now, to ensure that writes are flushed through to 2143 * the backend. Once is this done, we can switch to Closed in 2144 * acknowledgement. 2145 */ 2146 #if 0 2147 static void 2148 netfront_closing(device_t dev) 2149 { 2150 #if 0 2151 struct netfront_info *info = dev->dev_driver_data; 2152 2153 DPRINTK("netfront_closing: %s removed\n", dev->nodename); 2154 2155 close_netdev(info); 2156 #endif 2157 xenbus_switch_state(dev, XenbusStateClosed); 2158 } 2159 #endif 2160 2161 static int 2162 netfront_detach(device_t dev) 2163 { 2164 struct netfront_info *info = device_get_softc(dev); 2165 2166 DPRINTK("%s\n", xenbus_get_node(dev)); 2167 2168 netif_free(info); 2169 2170 return 0; 2171 } 2172 2173 static void 2174 netif_free(struct netfront_info *info) 2175 { 2176 XN_LOCK(info); 2177 xn_stop(info); 2178 XN_UNLOCK(info); 2179 callout_drain(&info->xn_stat_ch); 2180 netif_disconnect_backend(info); 2181 if (info->xn_ifp != NULL) { 2182 ether_ifdetach(info->xn_ifp); 2183 if_free(info->xn_ifp); 2184 info->xn_ifp = NULL; 2185 } 2186 ifmedia_removeall(&info->sc_media); 2187 } 2188 2189 static void 2190 netif_disconnect_backend(struct netfront_info *info) 2191 { 2192 XN_RX_LOCK(info); 2193 XN_TX_LOCK(info); 2194 netfront_carrier_off(info); 2195 XN_TX_UNLOCK(info); 2196 XN_RX_UNLOCK(info); 2197 2198 free_ring(&info->tx_ring_ref, &info->tx.sring); 2199 free_ring(&info->rx_ring_ref, &info->rx.sring); 2200 2201 if (info->irq) 2202 unbind_from_irqhandler(info->irq); 2203 2204 info->irq = 0; 2205 } 2206 2207 static void 2208 free_ring(int *ref, void *ring_ptr_ref) 2209 { 2210 void **ring_ptr_ptr = ring_ptr_ref; 2211 2212 if (*ref != GRANT_REF_INVALID) { 2213 /* This API frees the associated storage. */ 2214 gnttab_end_foreign_access(*ref, *ring_ptr_ptr); 2215 *ref = GRANT_REF_INVALID; 2216 } 2217 *ring_ptr_ptr = NULL; 2218 } 2219 2220 static int 2221 xn_ifmedia_upd(struct ifnet *ifp) 2222 { 2223 return (0); 2224 } 2225 2226 static void 2227 xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 2228 { 2229 ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE; 2230 ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; 2231 } 2232 2233 /* ** Driver registration ** */ 2234 static device_method_t netfront_methods[] = { 2235 /* Device interface */ 2236 DEVMETHOD(device_probe, netfront_probe), 2237 DEVMETHOD(device_attach, netfront_attach), 2238 DEVMETHOD(device_detach, netfront_detach), 2239 DEVMETHOD(device_shutdown, bus_generic_shutdown), 2240 DEVMETHOD(device_suspend, netfront_suspend), 2241 DEVMETHOD(device_resume, netfront_resume), 2242 2243 /* Xenbus interface */ 2244 DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed), 2245 2246 DEVMETHOD_END 2247 }; 2248 2249 static driver_t netfront_driver = { 2250 "xn", 2251 netfront_methods, 2252 sizeof(struct netfront_info), 2253 }; 2254 devclass_t netfront_devclass; 2255 2256 DRIVER_MODULE(xe, xenbusb_front, netfront_driver, netfront_devclass, NULL, 2257 NULL); 2258