1 /* 2 * Copyright (c) 2006, Cisco Systems, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors 15 * may be used to endorse or promote products derived from this software 16 * without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 #include "opt_sctp.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/sockio.h> 38 #include <sys/mbuf.h> 39 #include <sys/malloc.h> 40 #include <sys/kernel.h> 41 #include <sys/socket.h> 42 #include <sys/queue.h> 43 #include <sys/taskqueue.h> 44 45 #include <sys/module.h> 46 #include <sys/bus.h> 47 #include <sys/sysctl.h> 48 49 #include <net/if.h> 50 #include <net/if_arp.h> 51 #include <net/if_types.h> 52 #include <net/ethernet.h> 53 #include <net/if_bridgevar.h> 54 55 #include <netinet/in_systm.h> 56 #include <netinet/in.h> 57 #include <netinet/in_var.h> 58 #include <netinet/ip.h> 59 #include <netinet/tcp.h> 60 #include <netinet/udp.h> 61 #ifdef SCTP 62 #include <netinet/sctp.h> 63 #include <netinet/sctp_crc32.h> 64 #endif 65 66 #include <vm/vm_extern.h> 67 #include <vm/vm_kern.h> 68 69 #include <machine/in_cksum.h> 70 #include <machine/xen-os.h> 71 #include <machine/hypervisor.h> 72 #include <machine/hypervisor-ifs.h> 73 #include <machine/xen_intr.h> 74 #include <machine/evtchn.h> 75 #include <machine/xenbus.h> 76 #include <machine/gnttab.h> 77 #include <machine/xen-public/memory.h> 78 #include <dev/xen/xenbus/xenbus_comms.h> 79 80 81 #ifdef XEN_NETBACK_DEBUG 82 #define DPRINTF(fmt, args...) \ 83 printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 84 #else 85 #define DPRINTF(fmt, args...) ((void)0) 86 #endif 87 88 #ifdef XEN_NETBACK_DEBUG_LOTS 89 #define DDPRINTF(fmt, args...) \ 90 printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 91 #define DPRINTF_MBUF(_m) print_mbuf(_m, 0) 92 #define DPRINTF_MBUF_LEN(_m, _len) print_mbuf(_m, _len) 93 #else 94 #define DDPRINTF(fmt, args...) ((void)0) 95 #define DPRINTF_MBUF(_m) ((void)0) 96 #define DPRINTF_MBUF_LEN(_m, _len) ((void)0) 97 #endif 98 99 #define WPRINTF(fmt, args...) \ 100 printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 101 102 #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) 103 #define BUG_ON PANIC_IF 104 105 #define IFNAME(_np) (_np)->ifp->if_xname 106 107 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) 108 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) 109 110 struct ring_ref { 111 vm_offset_t va; 112 grant_handle_t handle; 113 uint64_t bus_addr; 114 }; 115 116 typedef struct netback_info { 117 118 /* Schedule lists */ 119 STAILQ_ENTRY(netback_info) next_tx; 120 STAILQ_ENTRY(netback_info) next_rx; 121 int on_tx_sched_list; 122 int on_rx_sched_list; 123 124 struct xenbus_device *xdev; 125 XenbusState frontend_state; 126 127 domid_t domid; 128 int handle; 129 char *bridge; 130 131 int rings_connected; 132 struct ring_ref tx_ring_ref; 133 struct ring_ref rx_ring_ref; 134 netif_tx_back_ring_t tx; 135 netif_rx_back_ring_t rx; 136 evtchn_port_t evtchn; 137 int irq; 138 void *irq_cookie; 139 140 struct ifnet *ifp; 141 int ref_cnt; 142 143 device_t ndev; 144 int attached; 145 } netif_t; 146 147 148 #define MAX_PENDING_REQS 256 149 #define PKT_PROT_LEN 64 150 151 static struct { 152 netif_tx_request_t req; 153 netif_t *netif; 154 } pending_tx_info[MAX_PENDING_REQS]; 155 static uint16_t pending_ring[MAX_PENDING_REQS]; 156 typedef unsigned int PEND_RING_IDX; 157 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) 158 static PEND_RING_IDX pending_prod, pending_cons; 159 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) 160 161 static unsigned long mmap_vstart; 162 #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE)) 163 164 /* Freed TX mbufs get batched on this ring before return to pending_ring. */ 165 static uint16_t dealloc_ring[MAX_PENDING_REQS]; 166 static PEND_RING_IDX dealloc_prod, dealloc_cons; 167 168 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1]; 169 static mmu_update_t rx_mmu[NET_RX_RING_SIZE]; 170 static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE]; 171 172 static grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; 173 static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS]; 174 static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS]; 175 176 static struct task net_tx_task, net_rx_task; 177 static struct callout rx_task_callout; 178 179 static STAILQ_HEAD(netback_tx_sched_list, netback_info) tx_sched_list = 180 STAILQ_HEAD_INITIALIZER(tx_sched_list); 181 static STAILQ_HEAD(netback_rx_sched_list, netback_info) rx_sched_list = 182 STAILQ_HEAD_INITIALIZER(rx_sched_list); 183 static struct mtx tx_sched_list_lock; 184 static struct mtx rx_sched_list_lock; 185 186 static int vif_unit_maker = 0; 187 188 /* Protos */ 189 static void netback_start(struct ifnet *ifp); 190 static int netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); 191 static int vif_add_dev(struct xenbus_device *xdev); 192 static void disconnect_rings(netif_t *netif); 193 194 #ifdef XEN_NETBACK_DEBUG_LOTS 195 /* Debug code to display the contents of an mbuf */ 196 static void 197 print_mbuf(struct mbuf *m, int max) 198 { 199 int i, j=0; 200 printf("mbuf %08x len = %d", (unsigned int)m, m->m_pkthdr.len); 201 for (; m; m = m->m_next) { 202 unsigned char *d = m->m_data; 203 for (i=0; i < m->m_len; i++) { 204 if (max && j == max) 205 break; 206 if ((j++ % 16) == 0) 207 printf("\n%04x:", j); 208 printf(" %02x", d[i]); 209 } 210 } 211 printf("\n"); 212 } 213 #endif 214 215 216 #define MAX_MFN_ALLOC 64 217 static unsigned long mfn_list[MAX_MFN_ALLOC]; 218 static unsigned int alloc_index = 0; 219 220 static unsigned long 221 alloc_mfn(void) 222 { 223 unsigned long mfn = 0; 224 struct xen_memory_reservation reservation = { 225 .extent_start = mfn_list, 226 .nr_extents = MAX_MFN_ALLOC, 227 .extent_order = 0, 228 .domid = DOMID_SELF 229 }; 230 if ( unlikely(alloc_index == 0) ) 231 alloc_index = HYPERVISOR_memory_op( 232 XENMEM_increase_reservation, &reservation); 233 if ( alloc_index != 0 ) 234 mfn = mfn_list[--alloc_index]; 235 return mfn; 236 } 237 238 static unsigned long 239 alloc_empty_page_range(unsigned long nr_pages) 240 { 241 void *pages; 242 int i = 0, j = 0; 243 multicall_entry_t mcl[17]; 244 unsigned long mfn_list[16]; 245 struct xen_memory_reservation reservation = { 246 .extent_start = mfn_list, 247 .nr_extents = 0, 248 .address_bits = 0, 249 .extent_order = 0, 250 .domid = DOMID_SELF 251 }; 252 253 pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT); 254 if (pages == NULL) 255 return 0; 256 257 memset(mcl, 0, sizeof(mcl)); 258 259 while (i < nr_pages) { 260 unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE); 261 262 mcl[j].op = __HYPERVISOR_update_va_mapping; 263 mcl[j].args[0] = va; 264 265 mfn_list[j++] = vtomach(va) >> PAGE_SHIFT; 266 267 xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY; 268 269 if (j == 16 || i == nr_pages) { 270 mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL; 271 272 reservation.nr_extents = j; 273 274 mcl[j].op = __HYPERVISOR_memory_op; 275 mcl[j].args[0] = XENMEM_decrease_reservation; 276 mcl[j].args[1] = (unsigned long)&reservation; 277 278 (void)HYPERVISOR_multicall(mcl, j+1); 279 280 mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0; 281 j = 0; 282 } 283 } 284 285 return (unsigned long)pages; 286 } 287 288 #ifdef XEN_NETBACK_FIXUP_CSUM 289 static void 290 fixup_checksum(struct mbuf *m) 291 { 292 struct ether_header *eh = mtod(m, struct ether_header *); 293 struct ip *ip = (struct ip *)(eh + 1); 294 int iphlen = ip->ip_hl << 2; 295 int iplen = ntohs(ip->ip_len); 296 297 if ((m->m_pkthdr.csum_flags & CSUM_TCP)) { 298 struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iphlen); 299 th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 300 htons(IPPROTO_TCP + (iplen - iphlen))); 301 th->th_sum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen); 302 m->m_pkthdr.csum_flags &= ~CSUM_TCP; 303 #ifdef SCTP 304 } else if (sw_csum & CSUM_SCTP) { 305 sctp_delayed_cksum(m, iphlen); 306 sw_csum &= ~CSUM_SCTP; 307 #endif 308 } else { 309 u_short csum; 310 struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen); 311 uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 312 htons(IPPROTO_UDP + (iplen - iphlen))); 313 if ((csum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen)) == 0) 314 csum = 0xffff; 315 uh->uh_sum = csum; 316 m->m_pkthdr.csum_flags &= ~CSUM_UDP; 317 } 318 } 319 #endif 320 321 /* Add the interface to the specified bridge */ 322 static int 323 add_to_bridge(struct ifnet *ifp, char *bridge) 324 { 325 struct ifdrv ifd; 326 struct ifbreq ifb; 327 struct ifnet *ifp_bridge = ifunit(bridge); 328 329 if (!ifp_bridge) 330 return ENOENT; 331 332 bzero(&ifd, sizeof(ifd)); 333 bzero(&ifb, sizeof(ifb)); 334 335 strcpy(ifb.ifbr_ifsname, ifp->if_xname); 336 strcpy(ifd.ifd_name, ifp->if_xname); 337 ifd.ifd_cmd = BRDGADD; 338 ifd.ifd_len = sizeof(ifb); 339 ifd.ifd_data = &ifb; 340 341 return bridge_ioctl_kern(ifp_bridge, SIOCSDRVSPEC, &ifd); 342 343 } 344 345 static int 346 netif_create(int handle, struct xenbus_device *xdev, char *bridge) 347 { 348 netif_t *netif; 349 struct ifnet *ifp; 350 351 netif = (netif_t *)malloc(sizeof(*netif), M_DEVBUF, M_NOWAIT | M_ZERO); 352 if (!netif) 353 return ENOMEM; 354 355 netif->ref_cnt = 1; 356 netif->handle = handle; 357 netif->domid = xdev->otherend_id; 358 netif->xdev = xdev; 359 netif->bridge = bridge; 360 xdev->data = netif; 361 362 /* Set up ifnet structure */ 363 ifp = netif->ifp = if_alloc(IFT_ETHER); 364 if (!ifp) { 365 if (bridge) 366 free(bridge, M_DEVBUF); 367 free(netif, M_DEVBUF); 368 return ENOMEM; 369 } 370 371 ifp->if_softc = netif; 372 if_initname(ifp, "vif", 373 atomic_fetchadd_int(&vif_unit_maker, 1) /* ifno */ ); 374 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX; 375 ifp->if_output = ether_output; 376 ifp->if_start = netback_start; 377 ifp->if_ioctl = netback_ioctl; 378 ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1; 379 380 DPRINTF("Created %s for domid=%d handle=%d\n", IFNAME(netif), netif->domid, netif->handle); 381 382 return 0; 383 } 384 385 static void 386 netif_get(netif_t *netif) 387 { 388 atomic_add_int(&netif->ref_cnt, 1); 389 } 390 391 static void 392 netif_put(netif_t *netif) 393 { 394 if (atomic_fetchadd_int(&netif->ref_cnt, -1) == 1) { 395 DPRINTF("%s\n", IFNAME(netif)); 396 disconnect_rings(netif); 397 if (netif->ifp) { 398 if_free(netif->ifp); 399 netif->ifp = NULL; 400 } 401 if (netif->bridge) 402 free(netif->bridge, M_DEVBUF); 403 free(netif, M_DEVBUF); 404 } 405 } 406 407 static int 408 netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 409 { 410 switch (cmd) { 411 case SIOCSIFFLAGS: 412 DDPRINTF("%s cmd=SIOCSIFFLAGS flags=%x\n", 413 IFNAME((struct netback_info *)ifp->if_softc), ((struct ifreq *)data)->ifr_flags); 414 return 0; 415 } 416 417 DDPRINTF("%s cmd=%lx\n", IFNAME((struct netback_info *)ifp->if_softc), cmd); 418 419 return ether_ioctl(ifp, cmd, data); 420 } 421 422 static inline void 423 maybe_schedule_tx_action(void) 424 { 425 smp_mb(); 426 if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && !STAILQ_EMPTY(&tx_sched_list)) 427 taskqueue_enqueue(taskqueue_swi, &net_tx_task); 428 } 429 430 /* Removes netif from front of list and does not call netif_put() (caller must) */ 431 static netif_t * 432 remove_from_tx_schedule_list(void) 433 { 434 netif_t *netif; 435 436 mtx_lock(&tx_sched_list_lock); 437 438 if ((netif = STAILQ_FIRST(&tx_sched_list))) { 439 STAILQ_REMOVE(&tx_sched_list, netif, netback_info, next_tx); 440 STAILQ_NEXT(netif, next_tx) = NULL; 441 netif->on_tx_sched_list = 0; 442 } 443 444 mtx_unlock(&tx_sched_list_lock); 445 446 return netif; 447 } 448 449 /* Adds netif to end of list and calls netif_get() */ 450 static void 451 add_to_tx_schedule_list_tail(netif_t *netif) 452 { 453 if (netif->on_tx_sched_list) 454 return; 455 456 mtx_lock(&tx_sched_list_lock); 457 if (!netif->on_tx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) { 458 netif_get(netif); 459 STAILQ_INSERT_TAIL(&tx_sched_list, netif, next_tx); 460 netif->on_tx_sched_list = 1; 461 } 462 mtx_unlock(&tx_sched_list_lock); 463 } 464 465 /* 466 * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER: 467 * If this driver is pipelining transmit requests then we can be very 468 * aggressive in avoiding new-packet notifications -- frontend only needs to 469 * send a notification if there are no outstanding unreceived responses. 470 * If we may be buffer transmit buffers for any reason then we must be rather 471 * more conservative and treat this as the final check for pending work. 472 */ 473 static void 474 netif_schedule_tx_work(netif_t *netif) 475 { 476 int more_to_do; 477 478 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER 479 more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx); 480 #else 481 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); 482 #endif 483 484 if (more_to_do) { 485 DDPRINTF("Adding %s to tx sched list\n", IFNAME(netif)); 486 add_to_tx_schedule_list_tail(netif); 487 maybe_schedule_tx_action(); 488 } 489 } 490 491 static struct mtx dealloc_lock; 492 MTX_SYSINIT(netback_dealloc, &dealloc_lock, "DEALLOC LOCK", MTX_SPIN | MTX_NOWITNESS); 493 494 static void 495 netif_idx_release(uint16_t pending_idx) 496 { 497 mtx_lock_spin(&dealloc_lock); 498 dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx; 499 mtx_unlock_spin(&dealloc_lock); 500 501 taskqueue_enqueue(taskqueue_swi, &net_tx_task); 502 } 503 504 static void 505 make_tx_response(netif_t *netif, 506 uint16_t id, 507 int8_t st) 508 { 509 RING_IDX i = netif->tx.rsp_prod_pvt; 510 netif_tx_response_t *resp; 511 int notify; 512 513 resp = RING_GET_RESPONSE(&netif->tx, i); 514 resp->id = id; 515 resp->status = st; 516 517 netif->tx.rsp_prod_pvt = ++i; 518 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify); 519 if (notify) 520 notify_remote_via_irq(netif->irq); 521 522 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER 523 if (i == netif->tx.req_cons) { 524 int more_to_do; 525 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); 526 if (more_to_do) 527 add_to_tx_schedule_list_tail(netif); 528 } 529 #endif 530 } 531 532 static inline void 533 net_tx_action_dealloc(void) 534 { 535 gnttab_unmap_grant_ref_t *gop; 536 uint16_t pending_idx; 537 PEND_RING_IDX dc, dp; 538 netif_t *netif; 539 int ret; 540 541 dc = dealloc_cons; 542 dp = dealloc_prod; 543 544 /* 545 * Free up any grants we have finished using 546 */ 547 gop = tx_unmap_ops; 548 while (dc != dp) { 549 pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)]; 550 gop->host_addr = MMAP_VADDR(pending_idx); 551 gop->dev_bus_addr = 0; 552 gop->handle = grant_tx_handle[pending_idx]; 553 gop++; 554 } 555 ret = HYPERVISOR_grant_table_op( 556 GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops); 557 BUG_ON(ret); 558 559 while (dealloc_cons != dp) { 560 pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)]; 561 562 netif = pending_tx_info[pending_idx].netif; 563 564 make_tx_response(netif, pending_tx_info[pending_idx].req.id, 565 NETIF_RSP_OKAY); 566 567 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; 568 569 netif_put(netif); 570 } 571 } 572 573 static void 574 netif_page_release(void *buf, void *args) 575 { 576 uint16_t pending_idx = (unsigned int)args; 577 578 DDPRINTF("pending_idx=%u\n", pending_idx); 579 580 KASSERT(pending_idx < MAX_PENDING_REQS, ("%s: bad index %u", __func__, pending_idx)); 581 582 netif_idx_release(pending_idx); 583 } 584 585 static void 586 net_tx_action(void *context, int pending) 587 { 588 struct mbuf *m; 589 netif_t *netif; 590 netif_tx_request_t txreq; 591 uint16_t pending_idx; 592 RING_IDX i; 593 gnttab_map_grant_ref_t *mop; 594 int ret, work_to_do; 595 struct mbuf *txq = NULL, *txq_last = NULL; 596 597 if (dealloc_cons != dealloc_prod) 598 net_tx_action_dealloc(); 599 600 mop = tx_map_ops; 601 while ((NR_PENDING_REQS < MAX_PENDING_REQS) && !STAILQ_EMPTY(&tx_sched_list)) { 602 603 /* Get a netif from the list with work to do. */ 604 netif = remove_from_tx_schedule_list(); 605 606 DDPRINTF("Processing %s (prod=%u, cons=%u)\n", 607 IFNAME(netif), netif->tx.sring->req_prod, netif->tx.req_cons); 608 609 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do); 610 if (!work_to_do) { 611 netif_put(netif); 612 continue; 613 } 614 615 i = netif->tx.req_cons; 616 rmb(); /* Ensure that we see the request before we copy it. */ 617 memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq)); 618 619 /* If we want credit-based scheduling, coud add it here - WORK */ 620 621 netif->tx.req_cons++; 622 623 netif_schedule_tx_work(netif); 624 625 if (unlikely(txreq.size < ETHER_HDR_LEN) || 626 unlikely(txreq.size > (ETHER_MAX_LEN-ETHER_CRC_LEN))) { 627 WPRINTF("Bad packet size: %d\n", txreq.size); 628 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); 629 netif_put(netif); 630 continue; 631 } 632 633 /* No crossing a page as the payload mustn't fragment. */ 634 if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) { 635 WPRINTF("txreq.offset: %x, size: %u, end: %u\n", 636 txreq.offset, txreq.size, 637 (txreq.offset & PAGE_MASK) + txreq.size); 638 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); 639 netif_put(netif); 640 continue; 641 } 642 643 pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; 644 645 MGETHDR(m, M_DONTWAIT, MT_DATA); 646 if (!m) { 647 WPRINTF("Failed to allocate mbuf\n"); 648 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); 649 netif_put(netif); 650 break; 651 } 652 m->m_pkthdr.rcvif = netif->ifp; 653 654 if ((m->m_pkthdr.len = txreq.size) > PKT_PROT_LEN) { 655 struct mbuf *n; 656 MGET(n, M_DONTWAIT, MT_DATA); 657 if (!(m->m_next = n)) { 658 m_freem(m); 659 WPRINTF("Failed to allocate second mbuf\n"); 660 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); 661 netif_put(netif); 662 break; 663 } 664 n->m_len = txreq.size - PKT_PROT_LEN; 665 m->m_len = PKT_PROT_LEN; 666 } else 667 m->m_len = txreq.size; 668 669 mop->host_addr = MMAP_VADDR(pending_idx); 670 mop->dom = netif->domid; 671 mop->ref = txreq.gref; 672 mop->flags = GNTMAP_host_map | GNTMAP_readonly; 673 mop++; 674 675 memcpy(&pending_tx_info[pending_idx].req, 676 &txreq, sizeof(txreq)); 677 pending_tx_info[pending_idx].netif = netif; 678 *((uint16_t *)m->m_data) = pending_idx; 679 680 if (txq_last) 681 txq_last->m_nextpkt = m; 682 else 683 txq = m; 684 txq_last = m; 685 686 pending_cons++; 687 688 if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops)) 689 break; 690 } 691 692 if (!txq) 693 return; 694 695 ret = HYPERVISOR_grant_table_op( 696 GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops); 697 BUG_ON(ret); 698 699 mop = tx_map_ops; 700 while ((m = txq) != NULL) { 701 caddr_t data; 702 703 txq = m->m_nextpkt; 704 m->m_nextpkt = NULL; 705 706 pending_idx = *((uint16_t *)m->m_data); 707 netif = pending_tx_info[pending_idx].netif; 708 memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq)); 709 710 /* Check the remap error code. */ 711 if (unlikely(mop->status)) { 712 WPRINTF("#### netback grant fails\n"); 713 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); 714 netif_put(netif); 715 m_freem(m); 716 mop++; 717 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; 718 continue; 719 } 720 721 #if 0 722 /* Can't do this in FreeBSD since vtophys() returns the pfn */ 723 /* of the remote domain who loaned us the machine page - DPT */ 724 xen_phys_machine[(vtophys(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT)] = 725 mop->dev_bus_addr >> PAGE_SHIFT; 726 #endif 727 grant_tx_handle[pending_idx] = mop->handle; 728 729 /* Setup data in mbuf (lengths are already set) */ 730 data = (caddr_t)(MMAP_VADDR(pending_idx)|txreq.offset); 731 bcopy(data, m->m_data, m->m_len); 732 if (m->m_next) { 733 struct mbuf *n = m->m_next; 734 MEXTADD(n, MMAP_VADDR(pending_idx), PAGE_SIZE, netif_page_release, 735 (void *)(unsigned int)pending_idx, M_RDONLY, EXT_NET_DRV); 736 n->m_data = &data[PKT_PROT_LEN]; 737 } else { 738 /* Schedule a response immediately. */ 739 netif_idx_release(pending_idx); 740 } 741 742 if ((txreq.flags & NETTXF_data_validated)) { 743 /* Tell the stack the checksums are okay */ 744 m->m_pkthdr.csum_flags |= 745 (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 746 m->m_pkthdr.csum_data = 0xffff; 747 } 748 749 /* If necessary, inform stack to compute the checksums if it forwards the packet */ 750 if ((txreq.flags & NETTXF_csum_blank)) { 751 struct ether_header *eh = mtod(m, struct ether_header *); 752 if (ntohs(eh->ether_type) == ETHERTYPE_IP) { 753 struct ip *ip = (struct ip *)&m->m_data[14]; 754 if (ip->ip_p == IPPROTO_TCP) 755 m->m_pkthdr.csum_flags |= CSUM_TCP; 756 else if (ip->ip_p == IPPROTO_UDP) 757 m->m_pkthdr.csum_flags |= CSUM_UDP; 758 } 759 } 760 761 netif->ifp->if_ibytes += m->m_pkthdr.len; 762 netif->ifp->if_ipackets++; 763 764 DDPRINTF("RECV %d bytes from %s (cflags=%x)\n", 765 m->m_pkthdr.len, IFNAME(netif), m->m_pkthdr.csum_flags); 766 DPRINTF_MBUF_LEN(m, 128); 767 768 (*netif->ifp->if_input)(netif->ifp, m); 769 770 mop++; 771 } 772 } 773 774 /* Handle interrupt from a frontend */ 775 static void 776 netback_intr(void *arg) 777 { 778 netif_t *netif = arg; 779 DDPRINTF("%s\n", IFNAME(netif)); 780 add_to_tx_schedule_list_tail(netif); 781 maybe_schedule_tx_action(); 782 } 783 784 /* Removes netif from front of list and does not call netif_put() (caller must) */ 785 static netif_t * 786 remove_from_rx_schedule_list(void) 787 { 788 netif_t *netif; 789 790 mtx_lock(&rx_sched_list_lock); 791 792 if ((netif = STAILQ_FIRST(&rx_sched_list))) { 793 STAILQ_REMOVE(&rx_sched_list, netif, netback_info, next_rx); 794 STAILQ_NEXT(netif, next_rx) = NULL; 795 netif->on_rx_sched_list = 0; 796 } 797 798 mtx_unlock(&rx_sched_list_lock); 799 800 return netif; 801 } 802 803 /* Adds netif to end of list and calls netif_get() */ 804 static void 805 add_to_rx_schedule_list_tail(netif_t *netif) 806 { 807 if (netif->on_rx_sched_list) 808 return; 809 810 mtx_lock(&rx_sched_list_lock); 811 if (!netif->on_rx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) { 812 netif_get(netif); 813 STAILQ_INSERT_TAIL(&rx_sched_list, netif, next_rx); 814 netif->on_rx_sched_list = 1; 815 } 816 mtx_unlock(&rx_sched_list_lock); 817 } 818 819 static int 820 make_rx_response(netif_t *netif, uint16_t id, int8_t st, 821 uint16_t offset, uint16_t size, uint16_t flags) 822 { 823 RING_IDX i = netif->rx.rsp_prod_pvt; 824 netif_rx_response_t *resp; 825 int notify; 826 827 resp = RING_GET_RESPONSE(&netif->rx, i); 828 resp->offset = offset; 829 resp->flags = flags; 830 resp->id = id; 831 resp->status = (int16_t)size; 832 if (st < 0) 833 resp->status = (int16_t)st; 834 835 DDPRINTF("rx resp(%d): off=%x fl=%x id=%x stat=%d\n", 836 i, resp->offset, resp->flags, resp->id, resp->status); 837 838 netif->rx.rsp_prod_pvt = ++i; 839 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify); 840 841 return notify; 842 } 843 844 static int 845 netif_rx(netif_t *netif) 846 { 847 struct ifnet *ifp = netif->ifp; 848 struct mbuf *m; 849 multicall_entry_t *mcl; 850 mmu_update_t *mmu; 851 gnttab_transfer_t *gop; 852 unsigned long vdata, old_mfn, new_mfn; 853 struct mbuf *rxq = NULL, *rxq_last = NULL; 854 int ret, notify = 0, pkts_dequeued = 0; 855 856 DDPRINTF("%s\n", IFNAME(netif)); 857 858 mcl = rx_mcl; 859 mmu = rx_mmu; 860 gop = grant_rx_op; 861 862 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 863 864 /* Quit if the target domain has no receive buffers */ 865 if (netif->rx.req_cons == netif->rx.sring->req_prod) 866 break; 867 868 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 869 if (m == NULL) 870 break; 871 872 pkts_dequeued++; 873 874 /* Check if we need to copy the data */ 875 if (((m->m_flags & (M_RDONLY|M_EXT)) != M_EXT) || 876 (*m->m_ext.ref_cnt > 1) || m->m_next != NULL) { 877 struct mbuf *n; 878 879 DDPRINTF("copying mbuf (fl=%x ext=%x rc=%d n=%x)\n", 880 m->m_flags, 881 (m->m_flags & M_EXT) ? m->m_ext.ext_type : 0, 882 (m->m_flags & M_EXT) ? *m->m_ext.ref_cnt : 0, 883 (unsigned int)m->m_next); 884 885 /* Make copy */ 886 MGETHDR(n, M_DONTWAIT, MT_DATA); 887 if (!n) 888 goto drop; 889 890 MCLGET(n, M_DONTWAIT); 891 if (!(n->m_flags & M_EXT)) { 892 m_freem(n); 893 goto drop; 894 } 895 896 /* Leave space at front and keep current alignment */ 897 n->m_data += 16 + ((unsigned int)m->m_data & 0x3); 898 899 if (m->m_pkthdr.len > M_TRAILINGSPACE(n)) { 900 WPRINTF("pkt to big %d\n", m->m_pkthdr.len); 901 m_freem(n); 902 goto drop; 903 } 904 m_copydata(m, 0, m->m_pkthdr.len, n->m_data); 905 n->m_pkthdr.len = n->m_len = m->m_pkthdr.len; 906 n->m_pkthdr.csum_flags = (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA); 907 m_freem(m); 908 m = n; 909 } 910 911 vdata = (unsigned long)m->m_data; 912 old_mfn = vtomach(vdata) >> PAGE_SHIFT; 913 914 if ((new_mfn = alloc_mfn()) == 0) 915 goto drop; 916 917 #ifdef XEN_NETBACK_FIXUP_CSUM 918 /* Check if we need to compute a checksum. This happens */ 919 /* when bridging from one domain to another. */ 920 if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) || 921 (m->m_pkthdr.csum_flags & CSUM_SCTP)) 922 fixup_checksum(m); 923 #endif 924 925 xen_phys_machine[(vtophys(vdata) >> PAGE_SHIFT)] = new_mfn; 926 927 mcl->op = __HYPERVISOR_update_va_mapping; 928 mcl->args[0] = vdata; 929 mcl->args[1] = (new_mfn << PAGE_SHIFT) | PG_V | PG_RW | PG_M | PG_A; 930 mcl->args[2] = 0; 931 mcl->args[3] = 0; 932 mcl++; 933 934 gop->mfn = old_mfn; 935 gop->domid = netif->domid; 936 gop->ref = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons)->gref; 937 netif->rx.req_cons++; 938 gop++; 939 940 mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; 941 mmu->val = vtophys(vdata) >> PAGE_SHIFT; 942 mmu++; 943 944 if (rxq_last) 945 rxq_last->m_nextpkt = m; 946 else 947 rxq = m; 948 rxq_last = m; 949 950 DDPRINTF("XMIT %d bytes to %s\n", m->m_pkthdr.len, IFNAME(netif)); 951 DPRINTF_MBUF_LEN(m, 128); 952 953 /* Filled the batch queue? */ 954 if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op)) 955 break; 956 957 continue; 958 drop: 959 DDPRINTF("dropping pkt\n"); 960 ifp->if_oerrors++; 961 m_freem(m); 962 } 963 964 if (mcl == rx_mcl) 965 return pkts_dequeued; 966 967 mcl->op = __HYPERVISOR_mmu_update; 968 mcl->args[0] = (unsigned long)rx_mmu; 969 mcl->args[1] = mmu - rx_mmu; 970 mcl->args[2] = 0; 971 mcl->args[3] = DOMID_SELF; 972 mcl++; 973 974 mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; 975 ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl); 976 BUG_ON(ret != 0); 977 978 ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, gop - grant_rx_op); 979 BUG_ON(ret != 0); 980 981 mcl = rx_mcl; 982 gop = grant_rx_op; 983 984 while ((m = rxq) != NULL) { 985 int8_t status; 986 uint16_t id, flags = 0; 987 988 rxq = m->m_nextpkt; 989 m->m_nextpkt = NULL; 990 991 /* Rederive the machine addresses. */ 992 new_mfn = mcl->args[1] >> PAGE_SHIFT; 993 old_mfn = gop->mfn; 994 995 ifp->if_obytes += m->m_pkthdr.len; 996 ifp->if_opackets++; 997 998 /* The update_va_mapping() must not fail. */ 999 BUG_ON(mcl->result != 0); 1000 1001 /* Setup flags */ 1002 if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) 1003 flags |= NETRXF_csum_blank | NETRXF_data_validated; 1004 else if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) 1005 flags |= NETRXF_data_validated; 1006 1007 /* Check the reassignment error code. */ 1008 status = NETIF_RSP_OKAY; 1009 if (gop->status != 0) { 1010 DPRINTF("Bad status %d from grant transfer to DOM%u\n", 1011 gop->status, netif->domid); 1012 /* 1013 * Page no longer belongs to us unless GNTST_bad_page, 1014 * but that should be a fatal error anyway. 1015 */ 1016 BUG_ON(gop->status == GNTST_bad_page); 1017 status = NETIF_RSP_ERROR; 1018 } 1019 id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id; 1020 notify |= make_rx_response(netif, id, status, 1021 (unsigned long)m->m_data & PAGE_MASK, 1022 m->m_pkthdr.len, flags); 1023 1024 m_freem(m); 1025 mcl++; 1026 gop++; 1027 } 1028 1029 if (notify) 1030 notify_remote_via_irq(netif->irq); 1031 1032 return pkts_dequeued; 1033 } 1034 1035 static void 1036 rx_task_timer(void *arg) 1037 { 1038 DDPRINTF("\n"); 1039 taskqueue_enqueue(taskqueue_swi, &net_rx_task); 1040 } 1041 1042 static void 1043 net_rx_action(void *context, int pending) 1044 { 1045 netif_t *netif, *last_zero_work = NULL; 1046 1047 DDPRINTF("\n"); 1048 1049 while ((netif = remove_from_rx_schedule_list())) { 1050 struct ifnet *ifp = netif->ifp; 1051 1052 if (netif == last_zero_work) { 1053 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) 1054 add_to_rx_schedule_list_tail(netif); 1055 netif_put(netif); 1056 if (!STAILQ_EMPTY(&rx_sched_list)) 1057 callout_reset(&rx_task_callout, 1, rx_task_timer, NULL); 1058 break; 1059 } 1060 1061 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { 1062 if (netif_rx(netif)) 1063 last_zero_work = NULL; 1064 else if (!last_zero_work) 1065 last_zero_work = netif; 1066 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) 1067 add_to_rx_schedule_list_tail(netif); 1068 } 1069 1070 netif_put(netif); 1071 } 1072 } 1073 1074 static void 1075 netback_start(struct ifnet *ifp) 1076 { 1077 netif_t *netif = (netif_t *)ifp->if_softc; 1078 1079 DDPRINTF("%s\n", IFNAME(netif)); 1080 1081 add_to_rx_schedule_list_tail(netif); 1082 taskqueue_enqueue(taskqueue_swi, &net_rx_task); 1083 } 1084 1085 /* Map a grant ref to a ring */ 1086 static int 1087 map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring) 1088 { 1089 struct gnttab_map_grant_ref op; 1090 1091 ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE); 1092 if (ring->va == 0) 1093 return ENOMEM; 1094 1095 op.host_addr = ring->va; 1096 op.flags = GNTMAP_host_map; 1097 op.ref = ref; 1098 op.dom = dom; 1099 HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); 1100 if (op.status) { 1101 WPRINTF("grant table op err=%d\n", op.status); 1102 kmem_free(kernel_map, ring->va, PAGE_SIZE); 1103 ring->va = 0; 1104 return EACCES; 1105 } 1106 1107 ring->handle = op.handle; 1108 ring->bus_addr = op.dev_bus_addr; 1109 1110 return 0; 1111 } 1112 1113 /* Unmap grant ref for a ring */ 1114 static void 1115 unmap_ring(struct ring_ref *ring) 1116 { 1117 struct gnttab_unmap_grant_ref op; 1118 1119 op.host_addr = ring->va; 1120 op.dev_bus_addr = ring->bus_addr; 1121 op.handle = ring->handle; 1122 HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); 1123 if (op.status) 1124 WPRINTF("grant table op err=%d\n", op.status); 1125 1126 kmem_free(kernel_map, ring->va, PAGE_SIZE); 1127 ring->va = 0; 1128 } 1129 1130 static int 1131 connect_rings(netif_t *netif) 1132 { 1133 struct xenbus_device *xdev = netif->xdev; 1134 netif_tx_sring_t *txs; 1135 netif_rx_sring_t *rxs; 1136 unsigned long tx_ring_ref, rx_ring_ref; 1137 evtchn_port_t evtchn; 1138 evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain }; 1139 int err; 1140 1141 // Grab FE data and map his memory 1142 err = xenbus_gather(NULL, xdev->otherend, 1143 "tx-ring-ref", "%lu", &tx_ring_ref, 1144 "rx-ring-ref", "%lu", &rx_ring_ref, 1145 "event-channel", "%u", &evtchn, NULL); 1146 if (err) { 1147 xenbus_dev_fatal(xdev, err, 1148 "reading %s/ring-ref and event-channel", 1149 xdev->otherend); 1150 return err; 1151 } 1152 1153 err = map_ring(tx_ring_ref, netif->domid, &netif->tx_ring_ref); 1154 if (err) { 1155 xenbus_dev_fatal(xdev, err, "mapping tx ring"); 1156 return err; 1157 } 1158 txs = (netif_tx_sring_t *)netif->tx_ring_ref.va; 1159 BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE); 1160 1161 err = map_ring(rx_ring_ref, netif->domid, &netif->rx_ring_ref); 1162 if (err) { 1163 unmap_ring(&netif->tx_ring_ref); 1164 xenbus_dev_fatal(xdev, err, "mapping rx ring"); 1165 return err; 1166 } 1167 rxs = (netif_rx_sring_t *)netif->rx_ring_ref.va; 1168 BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE); 1169 1170 op.u.bind_interdomain.remote_dom = netif->domid; 1171 op.u.bind_interdomain.remote_port = evtchn; 1172 err = HYPERVISOR_event_channel_op(&op); 1173 if (err) { 1174 unmap_ring(&netif->tx_ring_ref); 1175 unmap_ring(&netif->rx_ring_ref); 1176 xenbus_dev_fatal(xdev, err, "binding event channel"); 1177 return err; 1178 } 1179 netif->evtchn = op.u.bind_interdomain.local_port; 1180 1181 /* bind evtchn to irq handler */ 1182 netif->irq = 1183 bind_evtchn_to_irqhandler(netif->evtchn, "netback", 1184 netback_intr, netif, INTR_TYPE_NET|INTR_MPSAFE, &netif->irq_cookie); 1185 1186 netif->rings_connected = 1; 1187 1188 DPRINTF("%s connected! evtchn=%d irq=%d\n", 1189 IFNAME(netif), netif->evtchn, netif->irq); 1190 1191 return 0; 1192 } 1193 1194 static void 1195 disconnect_rings(netif_t *netif) 1196 { 1197 DPRINTF("\n"); 1198 1199 if (netif->rings_connected) { 1200 unbind_from_irqhandler(netif->irq, netif->irq_cookie); 1201 netif->irq = 0; 1202 unmap_ring(&netif->tx_ring_ref); 1203 unmap_ring(&netif->rx_ring_ref); 1204 netif->rings_connected = 0; 1205 } 1206 } 1207 1208 static void 1209 connect(netif_t *netif) 1210 { 1211 if (!netif->xdev || 1212 !netif->attached || 1213 netif->frontend_state != XenbusStateConnected) { 1214 return; 1215 } 1216 1217 if (!connect_rings(netif)) { 1218 xenbus_switch_state(netif->xdev, NULL, XenbusStateConnected); 1219 1220 /* Turn on interface */ 1221 netif->ifp->if_drv_flags |= IFF_DRV_RUNNING; 1222 netif->ifp->if_flags |= IFF_UP; 1223 } 1224 } 1225 1226 static int 1227 netback_remove(struct xenbus_device *xdev) 1228 { 1229 netif_t *netif = xdev->data; 1230 device_t ndev; 1231 1232 DPRINTF("remove %s\n", xdev->nodename); 1233 1234 if ((ndev = netif->ndev)) { 1235 netif->ndev = NULL; 1236 mtx_lock(&Giant); 1237 device_detach(ndev); 1238 mtx_unlock(&Giant); 1239 } 1240 1241 xdev->data = NULL; 1242 netif->xdev = NULL; 1243 netif_put(netif); 1244 1245 return 0; 1246 } 1247 1248 /** 1249 * Entry point to this code when a new device is created. Allocate the basic 1250 * structures and the ring buffers for communication with the frontend. 1251 * Switch to Connected state. 1252 */ 1253 static int 1254 netback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id) 1255 { 1256 int err; 1257 long handle; 1258 char *bridge; 1259 1260 DPRINTF("node=%s\n", xdev->nodename); 1261 1262 /* Grab the handle */ 1263 err = xenbus_scanf(NULL, xdev->nodename, "handle", "%li", &handle); 1264 if (err != 1) { 1265 xenbus_dev_fatal(xdev, err, "reading handle"); 1266 return err; 1267 } 1268 1269 /* Check for bridge */ 1270 bridge = xenbus_read(NULL, xdev->nodename, "bridge", NULL); 1271 if (IS_ERR(bridge)) 1272 bridge = NULL; 1273 1274 err = xenbus_switch_state(xdev, NULL, XenbusStateInitWait); 1275 if (err) { 1276 xenbus_dev_fatal(xdev, err, "writing switch state"); 1277 return err; 1278 } 1279 1280 err = netif_create(handle, xdev, bridge); 1281 if (err) { 1282 xenbus_dev_fatal(xdev, err, "creating netif"); 1283 return err; 1284 } 1285 1286 err = vif_add_dev(xdev); 1287 if (err) { 1288 netif_put((netif_t *)xdev->data); 1289 xenbus_dev_fatal(xdev, err, "adding vif device"); 1290 return err; 1291 } 1292 1293 return 0; 1294 } 1295 1296 /** 1297 * We are reconnecting to the backend, due to a suspend/resume, or a backend 1298 * driver restart. We tear down our netif structure and recreate it, but 1299 * leave the device-layer structures intact so that this is transparent to the 1300 * rest of the kernel. 1301 */ 1302 static int netback_resume(struct xenbus_device *xdev) 1303 { 1304 DPRINTF("node=%s\n", xdev->nodename); 1305 return 0; 1306 } 1307 1308 1309 /** 1310 * Callback received when the frontend's state changes. 1311 */ 1312 static void frontend_changed(struct xenbus_device *xdev, 1313 XenbusState frontend_state) 1314 { 1315 netif_t *netif = xdev->data; 1316 1317 DPRINTF("state=%d\n", frontend_state); 1318 1319 netif->frontend_state = frontend_state; 1320 1321 switch (frontend_state) { 1322 case XenbusStateInitialising: 1323 case XenbusStateInitialised: 1324 break; 1325 case XenbusStateConnected: 1326 connect(netif); 1327 break; 1328 case XenbusStateClosing: 1329 xenbus_switch_state(xdev, NULL, XenbusStateClosing); 1330 break; 1331 case XenbusStateClosed: 1332 xenbus_remove_device(xdev); 1333 break; 1334 case XenbusStateUnknown: 1335 case XenbusStateInitWait: 1336 xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend", 1337 frontend_state); 1338 break; 1339 } 1340 } 1341 1342 /* ** Driver registration ** */ 1343 1344 static struct xenbus_device_id netback_ids[] = { 1345 { "vif" }, 1346 { "" } 1347 }; 1348 1349 static struct xenbus_driver netback = { 1350 .name = "netback", 1351 .ids = netback_ids, 1352 .probe = netback_probe, 1353 .remove = netback_remove, 1354 .resume= netback_resume, 1355 .otherend_changed = frontend_changed, 1356 }; 1357 1358 static void 1359 netback_init(void *unused) 1360 { 1361 callout_init(&rx_task_callout, CALLOUT_MPSAFE); 1362 1363 mmap_vstart = alloc_empty_page_range(MAX_PENDING_REQS); 1364 BUG_ON(!mmap_vstart); 1365 1366 pending_cons = 0; 1367 for (pending_prod = 0; pending_prod < MAX_PENDING_REQS; pending_prod++) 1368 pending_ring[pending_prod] = pending_prod; 1369 1370 TASK_INIT(&net_tx_task, 0, net_tx_action, NULL); 1371 TASK_INIT(&net_rx_task, 0, net_rx_action, NULL); 1372 mtx_init(&tx_sched_list_lock, "nb_tx_sched_lock", "netback tx sched lock", MTX_DEF); 1373 mtx_init(&rx_sched_list_lock, "nb_rx_sched_lock", "netback rx sched lock", MTX_DEF); 1374 1375 DPRINTF("registering %s\n", netback.name); 1376 1377 xenbus_register_backend(&netback); 1378 } 1379 1380 SYSINIT(xnbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, netback_init, NULL) 1381 1382 static int 1383 vif_add_dev(struct xenbus_device *xdev) 1384 { 1385 netif_t *netif = xdev->data; 1386 device_t nexus, ndev; 1387 devclass_t dc; 1388 int err = 0; 1389 1390 mtx_lock(&Giant); 1391 1392 /* We will add a vif device as a child of nexus0 (for now) */ 1393 if (!(dc = devclass_find("nexus")) || 1394 !(nexus = devclass_get_device(dc, 0))) { 1395 WPRINTF("could not find nexus0!\n"); 1396 err = ENOENT; 1397 goto done; 1398 } 1399 1400 1401 /* Create a newbus device representing the vif */ 1402 ndev = BUS_ADD_CHILD(nexus, 0, "vif", netif->ifp->if_dunit); 1403 if (!ndev) { 1404 WPRINTF("could not create newbus device %s!\n", IFNAME(netif)); 1405 err = EFAULT; 1406 goto done; 1407 } 1408 1409 netif_get(netif); 1410 device_set_ivars(ndev, netif); 1411 netif->ndev = ndev; 1412 1413 device_probe_and_attach(ndev); 1414 1415 done: 1416 1417 mtx_unlock(&Giant); 1418 1419 return err; 1420 } 1421 1422 enum { 1423 VIF_SYSCTL_DOMID, 1424 VIF_SYSCTL_HANDLE, 1425 VIF_SYSCTL_TXRING, 1426 VIF_SYSCTL_RXRING, 1427 }; 1428 1429 static char * 1430 vif_sysctl_ring_info(netif_t *netif, int cmd) 1431 { 1432 char *buf = malloc(256, M_DEVBUF, M_WAITOK); 1433 if (buf) { 1434 if (!netif->rings_connected) 1435 sprintf(buf, "rings not connected\n"); 1436 else if (cmd == VIF_SYSCTL_TXRING) { 1437 netif_tx_back_ring_t *tx = &netif->tx; 1438 sprintf(buf, "nr_ents=%x req_cons=%x" 1439 " req_prod=%x req_event=%x" 1440 " rsp_prod=%x rsp_event=%x", 1441 tx->nr_ents, tx->req_cons, 1442 tx->sring->req_prod, tx->sring->req_event, 1443 tx->sring->rsp_prod, tx->sring->rsp_event); 1444 } else { 1445 netif_rx_back_ring_t *rx = &netif->rx; 1446 sprintf(buf, "nr_ents=%x req_cons=%x" 1447 " req_prod=%x req_event=%x" 1448 " rsp_prod=%x rsp_event=%x", 1449 rx->nr_ents, rx->req_cons, 1450 rx->sring->req_prod, rx->sring->req_event, 1451 rx->sring->rsp_prod, rx->sring->rsp_event); 1452 } 1453 } 1454 return buf; 1455 } 1456 1457 static int 1458 vif_sysctl_handler(SYSCTL_HANDLER_ARGS) 1459 { 1460 device_t dev = (device_t)arg1; 1461 netif_t *netif = (netif_t *)device_get_ivars(dev); 1462 const char *value; 1463 char *buf = NULL; 1464 int err; 1465 1466 switch (arg2) { 1467 case VIF_SYSCTL_DOMID: 1468 return sysctl_handle_int(oidp, NULL, netif->domid, req); 1469 case VIF_SYSCTL_HANDLE: 1470 return sysctl_handle_int(oidp, NULL, netif->handle, req); 1471 case VIF_SYSCTL_TXRING: 1472 case VIF_SYSCTL_RXRING: 1473 value = buf = vif_sysctl_ring_info(netif, arg2); 1474 break; 1475 default: 1476 return (EINVAL); 1477 } 1478 1479 err = SYSCTL_OUT(req, value, strlen(value)); 1480 if (buf != NULL) 1481 free(buf, M_DEVBUF); 1482 1483 return err; 1484 } 1485 1486 /* Newbus vif device driver probe */ 1487 static int 1488 vif_probe(device_t dev) 1489 { 1490 DDPRINTF("vif%d\n", device_get_unit(dev)); 1491 return 0; 1492 } 1493 1494 /* Newbus vif device driver attach */ 1495 static int 1496 vif_attach(device_t dev) 1497 { 1498 netif_t *netif = (netif_t *)device_get_ivars(dev); 1499 uint8_t mac[ETHER_ADDR_LEN]; 1500 1501 DDPRINTF("%s\n", IFNAME(netif)); 1502 1503 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 1504 OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD, 1505 dev, VIF_SYSCTL_DOMID, vif_sysctl_handler, "I", 1506 "domid of frontend"); 1507 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 1508 OID_AUTO, "handle", CTLTYPE_INT|CTLFLAG_RD, 1509 dev, VIF_SYSCTL_HANDLE, vif_sysctl_handler, "I", 1510 "handle of frontend"); 1511 #ifdef XEN_NETBACK_DEBUG 1512 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 1513 OID_AUTO, "txring", CTLTYPE_STRING | CTLFLAG_RD, 1514 dev, VIF_SYSCTL_TXRING, vif_sysctl_handler, "A", 1515 "tx ring info"); 1516 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 1517 OID_AUTO, "rxring", CTLTYPE_STRING | CTLFLAG_RD, 1518 dev, VIF_SYSCTL_RXRING, vif_sysctl_handler, "A", 1519 "rx ring info"); 1520 #endif 1521 1522 memset(mac, 0xff, sizeof(mac)); 1523 mac[0] &= ~0x01; 1524 1525 ether_ifattach(netif->ifp, mac); 1526 netif->attached = 1; 1527 1528 connect(netif); 1529 1530 if (netif->bridge) { 1531 DPRINTF("Adding %s to bridge %s\n", IFNAME(netif), netif->bridge); 1532 int err = add_to_bridge(netif->ifp, netif->bridge); 1533 if (err) { 1534 WPRINTF("Error adding %s to %s; err=%d\n", 1535 IFNAME(netif), netif->bridge, err); 1536 } 1537 } 1538 1539 return bus_generic_attach(dev); 1540 } 1541 1542 /* Newbus vif device driver detach */ 1543 static int 1544 vif_detach(device_t dev) 1545 { 1546 netif_t *netif = (netif_t *)device_get_ivars(dev); 1547 struct ifnet *ifp = netif->ifp; 1548 1549 DDPRINTF("%s\n", IFNAME(netif)); 1550 1551 /* Tell the stack that the interface is no longer active */ 1552 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 1553 1554 ether_ifdetach(ifp); 1555 1556 bus_generic_detach(dev); 1557 1558 netif->attached = 0; 1559 1560 netif_put(netif); 1561 1562 return 0; 1563 } 1564 1565 static device_method_t vif_methods[] = { 1566 /* Device interface */ 1567 DEVMETHOD(device_probe, vif_probe), 1568 DEVMETHOD(device_attach, vif_attach), 1569 DEVMETHOD(device_detach, vif_detach), 1570 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1571 DEVMETHOD(device_suspend, bus_generic_suspend), 1572 DEVMETHOD(device_resume, bus_generic_resume), 1573 {0, 0} 1574 }; 1575 1576 static devclass_t vif_devclass; 1577 1578 static driver_t vif_driver = { 1579 "vif", 1580 vif_methods, 1581 0, 1582 }; 1583 1584 DRIVER_MODULE(vif, nexus, vif_driver, vif_devclass, 0, 0); 1585 1586 1587 /* 1588 * Local variables: 1589 * mode: C 1590 * c-set-style: "BSD" 1591 * c-basic-offset: 4 1592 * tab-width: 4 1593 * indent-tabs-mode: t 1594 * End: 1595 */ 1596