1 /* 2 * Copyright (c) 2006, Cisco Systems, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors 15 * may be used to endorse or promote products derived from this software 16 * without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/sockio.h> 37 #include <sys/mbuf.h> 38 #include <sys/malloc.h> 39 #include <sys/kernel.h> 40 #include <sys/socket.h> 41 #include <sys/queue.h> 42 #include <sys/taskqueue.h> 43 44 #include <sys/module.h> 45 #include <sys/bus.h> 46 #include <sys/sysctl.h> 47 48 #include <net/if.h> 49 #include <net/if_arp.h> 50 #include <net/if_types.h> 51 #include <net/ethernet.h> 52 #include <net/if_bridgevar.h> 53 54 #include <netinet/in_systm.h> 55 #include <netinet/in.h> 56 #include <netinet/in_var.h> 57 #include <netinet/ip.h> 58 #include <netinet/tcp.h> 59 #include <netinet/udp.h> 60 61 #include <vm/vm_extern.h> 62 #include <vm/vm_kern.h> 63 64 #include <machine/in_cksum.h> 65 #include <machine/xen-os.h> 66 #include <machine/hypervisor.h> 67 #include <machine/hypervisor-ifs.h> 68 #include <machine/xen_intr.h> 69 #include <machine/evtchn.h> 70 #include <machine/xenbus.h> 71 #include <machine/gnttab.h> 72 #include <machine/xen-public/memory.h> 73 #include <dev/xen/xenbus/xenbus_comms.h> 74 75 76 #ifdef XEN_NETBACK_DEBUG 77 #define DPRINTF(fmt, args...) \ 78 printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 79 #else 80 #define DPRINTF(fmt, args...) ((void)0) 81 #endif 82 83 #ifdef XEN_NETBACK_DEBUG_LOTS 84 #define DDPRINTF(fmt, args...) \ 85 printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 86 #define DPRINTF_MBUF(_m) print_mbuf(_m, 0) 87 #define DPRINTF_MBUF_LEN(_m, _len) print_mbuf(_m, _len) 88 #else 89 #define DDPRINTF(fmt, args...) ((void)0) 90 #define DPRINTF_MBUF(_m) ((void)0) 91 #define DPRINTF_MBUF_LEN(_m, _len) ((void)0) 92 #endif 93 94 #define WPRINTF(fmt, args...) \ 95 printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 96 97 #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) 98 #define BUG_ON PANIC_IF 99 100 #define IFNAME(_np) (_np)->ifp->if_xname 101 102 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) 103 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) 104 105 struct ring_ref { 106 vm_offset_t va; 107 grant_handle_t handle; 108 uint64_t bus_addr; 109 }; 110 111 typedef struct netback_info { 112 113 /* Schedule lists */ 114 STAILQ_ENTRY(netback_info) next_tx; 115 STAILQ_ENTRY(netback_info) next_rx; 116 int on_tx_sched_list; 117 int on_rx_sched_list; 118 119 struct xenbus_device *xdev; 120 XenbusState frontend_state; 121 122 domid_t domid; 123 int handle; 124 char *bridge; 125 126 int rings_connected; 127 struct ring_ref tx_ring_ref; 128 struct ring_ref rx_ring_ref; 129 netif_tx_back_ring_t tx; 130 netif_rx_back_ring_t rx; 131 evtchn_port_t evtchn; 132 int irq; 133 void *irq_cookie; 134 135 struct ifnet *ifp; 136 int ref_cnt; 137 138 device_t ndev; 139 int attached; 140 } netif_t; 141 142 143 #define MAX_PENDING_REQS 256 144 #define PKT_PROT_LEN 64 145 146 static struct { 147 netif_tx_request_t req; 148 netif_t *netif; 149 } pending_tx_info[MAX_PENDING_REQS]; 150 static uint16_t pending_ring[MAX_PENDING_REQS]; 151 typedef unsigned int PEND_RING_IDX; 152 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) 153 static PEND_RING_IDX pending_prod, pending_cons; 154 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) 155 156 static unsigned long mmap_vstart; 157 #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE)) 158 159 /* Freed TX mbufs get batched on this ring before return to pending_ring. */ 160 static uint16_t dealloc_ring[MAX_PENDING_REQS]; 161 static PEND_RING_IDX dealloc_prod, dealloc_cons; 162 163 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1]; 164 static mmu_update_t rx_mmu[NET_RX_RING_SIZE]; 165 static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE]; 166 167 static grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; 168 static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS]; 169 static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS]; 170 171 static struct task net_tx_task, net_rx_task; 172 static struct callout rx_task_callout; 173 174 static STAILQ_HEAD(netback_tx_sched_list, netback_info) tx_sched_list = 175 STAILQ_HEAD_INITIALIZER(tx_sched_list); 176 static STAILQ_HEAD(netback_rx_sched_list, netback_info) rx_sched_list = 177 STAILQ_HEAD_INITIALIZER(rx_sched_list); 178 static struct mtx tx_sched_list_lock; 179 static struct mtx rx_sched_list_lock; 180 181 static int vif_unit_maker = 0; 182 183 /* Protos */ 184 static void netback_start(struct ifnet *ifp); 185 static int netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); 186 static int vif_add_dev(struct xenbus_device *xdev); 187 static void disconnect_rings(netif_t *netif); 188 189 #ifdef XEN_NETBACK_DEBUG_LOTS 190 /* Debug code to display the contents of an mbuf */ 191 static void 192 print_mbuf(struct mbuf *m, int max) 193 { 194 int i, j=0; 195 printf("mbuf %08x len = %d", (unsigned int)m, m->m_pkthdr.len); 196 for (; m; m = m->m_next) { 197 unsigned char *d = m->m_data; 198 for (i=0; i < m->m_len; i++) { 199 if (max && j == max) 200 break; 201 if ((j++ % 16) == 0) 202 printf("\n%04x:", j); 203 printf(" %02x", d[i]); 204 } 205 } 206 printf("\n"); 207 } 208 #endif 209 210 211 #define MAX_MFN_ALLOC 64 212 static unsigned long mfn_list[MAX_MFN_ALLOC]; 213 static unsigned int alloc_index = 0; 214 215 static unsigned long 216 alloc_mfn(void) 217 { 218 unsigned long mfn = 0; 219 struct xen_memory_reservation reservation = { 220 .extent_start = mfn_list, 221 .nr_extents = MAX_MFN_ALLOC, 222 .extent_order = 0, 223 .domid = DOMID_SELF 224 }; 225 if ( unlikely(alloc_index == 0) ) 226 alloc_index = HYPERVISOR_memory_op( 227 XENMEM_increase_reservation, &reservation); 228 if ( alloc_index != 0 ) 229 mfn = mfn_list[--alloc_index]; 230 return mfn; 231 } 232 233 static unsigned long 234 alloc_empty_page_range(unsigned long nr_pages) 235 { 236 void *pages; 237 int i = 0, j = 0; 238 multicall_entry_t mcl[17]; 239 unsigned long mfn_list[16]; 240 struct xen_memory_reservation reservation = { 241 .extent_start = mfn_list, 242 .nr_extents = 0, 243 .address_bits = 0, 244 .extent_order = 0, 245 .domid = DOMID_SELF 246 }; 247 248 pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT); 249 if (pages == NULL) 250 return 0; 251 252 memset(mcl, 0, sizeof(mcl)); 253 254 while (i < nr_pages) { 255 unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE); 256 257 mcl[j].op = __HYPERVISOR_update_va_mapping; 258 mcl[j].args[0] = va; 259 260 mfn_list[j++] = vtomach(va) >> PAGE_SHIFT; 261 262 xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY; 263 264 if (j == 16 || i == nr_pages) { 265 mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL; 266 267 reservation.nr_extents = j; 268 269 mcl[j].op = __HYPERVISOR_memory_op; 270 mcl[j].args[0] = XENMEM_decrease_reservation; 271 mcl[j].args[1] = (unsigned long)&reservation; 272 273 (void)HYPERVISOR_multicall(mcl, j+1); 274 275 mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0; 276 j = 0; 277 } 278 } 279 280 return (unsigned long)pages; 281 } 282 283 #ifdef XEN_NETBACK_FIXUP_CSUM 284 static void 285 fixup_checksum(struct mbuf *m) 286 { 287 struct ether_header *eh = mtod(m, struct ether_header *); 288 struct ip *ip = (struct ip *)(eh + 1); 289 int iphlen = ip->ip_hl << 2; 290 int iplen = ntohs(ip->ip_len); 291 292 if ((m->m_pkthdr.csum_flags & CSUM_TCP)) { 293 struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iphlen); 294 th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 295 htons(IPPROTO_TCP + (iplen - iphlen))); 296 th->th_sum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen); 297 m->m_pkthdr.csum_flags &= ~CSUM_TCP; 298 } else { 299 u_short csum; 300 struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen); 301 uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 302 htons(IPPROTO_UDP + (iplen - iphlen))); 303 if ((csum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen)) == 0) 304 csum = 0xffff; 305 uh->uh_sum = csum; 306 m->m_pkthdr.csum_flags &= ~CSUM_UDP; 307 } 308 } 309 #endif 310 311 /* Add the interface to the specified bridge */ 312 static int 313 add_to_bridge(struct ifnet *ifp, char *bridge) 314 { 315 struct ifdrv ifd; 316 struct ifbreq ifb; 317 struct ifnet *ifp_bridge = ifunit(bridge); 318 319 if (!ifp_bridge) 320 return ENOENT; 321 322 bzero(&ifd, sizeof(ifd)); 323 bzero(&ifb, sizeof(ifb)); 324 325 strcpy(ifb.ifbr_ifsname, ifp->if_xname); 326 strcpy(ifd.ifd_name, ifp->if_xname); 327 ifd.ifd_cmd = BRDGADD; 328 ifd.ifd_len = sizeof(ifb); 329 ifd.ifd_data = &ifb; 330 331 return bridge_ioctl_kern(ifp_bridge, SIOCSDRVSPEC, &ifd); 332 333 } 334 335 static int 336 netif_create(int handle, struct xenbus_device *xdev, char *bridge) 337 { 338 netif_t *netif; 339 struct ifnet *ifp; 340 341 netif = (netif_t *)malloc(sizeof(*netif), M_DEVBUF, M_NOWAIT | M_ZERO); 342 if (!netif) 343 return ENOMEM; 344 345 netif->ref_cnt = 1; 346 netif->handle = handle; 347 netif->domid = xdev->otherend_id; 348 netif->xdev = xdev; 349 netif->bridge = bridge; 350 xdev->data = netif; 351 352 /* Set up ifnet structure */ 353 ifp = netif->ifp = if_alloc(IFT_ETHER); 354 if (!ifp) { 355 if (bridge) 356 free(bridge, M_DEVBUF); 357 free(netif, M_DEVBUF); 358 return ENOMEM; 359 } 360 361 ifp->if_softc = netif; 362 if_initname(ifp, "vif", 363 atomic_fetchadd_int(&vif_unit_maker, 1) /* ifno */ ); 364 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX; 365 ifp->if_output = ether_output; 366 ifp->if_start = netback_start; 367 ifp->if_ioctl = netback_ioctl; 368 ifp->if_mtu = ETHERMTU; 369 ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1; 370 371 DPRINTF("Created %s for domid=%d handle=%d\n", IFNAME(netif), netif->domid, netif->handle); 372 373 return 0; 374 } 375 376 static void 377 netif_get(netif_t *netif) 378 { 379 atomic_add_int(&netif->ref_cnt, 1); 380 } 381 382 static void 383 netif_put(netif_t *netif) 384 { 385 if (atomic_fetchadd_int(&netif->ref_cnt, -1) == 1) { 386 DPRINTF("%s\n", IFNAME(netif)); 387 disconnect_rings(netif); 388 if (netif->ifp) { 389 if_free(netif->ifp); 390 netif->ifp = NULL; 391 } 392 if (netif->bridge) 393 free(netif->bridge, M_DEVBUF); 394 free(netif, M_DEVBUF); 395 } 396 } 397 398 static int 399 netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 400 { 401 switch (cmd) { 402 case SIOCSIFFLAGS: 403 DDPRINTF("%s cmd=SIOCSIFFLAGS flags=%x\n", 404 IFNAME((struct netback_info *)ifp->if_softc), ((struct ifreq *)data)->ifr_flags); 405 return 0; 406 } 407 408 DDPRINTF("%s cmd=%lx\n", IFNAME((struct netback_info *)ifp->if_softc), cmd); 409 410 return ether_ioctl(ifp, cmd, data); 411 } 412 413 static inline void 414 maybe_schedule_tx_action(void) 415 { 416 smp_mb(); 417 if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && !STAILQ_EMPTY(&tx_sched_list)) 418 taskqueue_enqueue(taskqueue_swi, &net_tx_task); 419 } 420 421 /* Removes netif from front of list and does not call netif_put() (caller must) */ 422 static netif_t * 423 remove_from_tx_schedule_list(void) 424 { 425 netif_t *netif; 426 427 mtx_lock(&tx_sched_list_lock); 428 429 if ((netif = STAILQ_FIRST(&tx_sched_list))) { 430 STAILQ_REMOVE(&tx_sched_list, netif, netback_info, next_tx); 431 STAILQ_NEXT(netif, next_tx) = NULL; 432 netif->on_tx_sched_list = 0; 433 } 434 435 mtx_unlock(&tx_sched_list_lock); 436 437 return netif; 438 } 439 440 /* Adds netif to end of list and calls netif_get() */ 441 static void 442 add_to_tx_schedule_list_tail(netif_t *netif) 443 { 444 if (netif->on_tx_sched_list) 445 return; 446 447 mtx_lock(&tx_sched_list_lock); 448 if (!netif->on_tx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) { 449 netif_get(netif); 450 STAILQ_INSERT_TAIL(&tx_sched_list, netif, next_tx); 451 netif->on_tx_sched_list = 1; 452 } 453 mtx_unlock(&tx_sched_list_lock); 454 } 455 456 /* 457 * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER: 458 * If this driver is pipelining transmit requests then we can be very 459 * aggressive in avoiding new-packet notifications -- frontend only needs to 460 * send a notification if there are no outstanding unreceived responses. 461 * If we may be buffer transmit buffers for any reason then we must be rather 462 * more conservative and treat this as the final check for pending work. 463 */ 464 static void 465 netif_schedule_tx_work(netif_t *netif) 466 { 467 int more_to_do; 468 469 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER 470 more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx); 471 #else 472 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); 473 #endif 474 475 if (more_to_do) { 476 DDPRINTF("Adding %s to tx sched list\n", IFNAME(netif)); 477 add_to_tx_schedule_list_tail(netif); 478 maybe_schedule_tx_action(); 479 } 480 } 481 482 static struct mtx dealloc_lock; 483 MTX_SYSINIT(netback_dealloc, &dealloc_lock, "DEALLOC LOCK", MTX_SPIN | MTX_NOWITNESS); 484 485 static void 486 netif_idx_release(uint16_t pending_idx) 487 { 488 mtx_lock_spin(&dealloc_lock); 489 dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx; 490 mtx_unlock_spin(&dealloc_lock); 491 492 taskqueue_enqueue(taskqueue_swi, &net_tx_task); 493 } 494 495 static void 496 make_tx_response(netif_t *netif, 497 uint16_t id, 498 int8_t st) 499 { 500 RING_IDX i = netif->tx.rsp_prod_pvt; 501 netif_tx_response_t *resp; 502 int notify; 503 504 resp = RING_GET_RESPONSE(&netif->tx, i); 505 resp->id = id; 506 resp->status = st; 507 508 netif->tx.rsp_prod_pvt = ++i; 509 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify); 510 if (notify) 511 notify_remote_via_irq(netif->irq); 512 513 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER 514 if (i == netif->tx.req_cons) { 515 int more_to_do; 516 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); 517 if (more_to_do) 518 add_to_tx_schedule_list_tail(netif); 519 } 520 #endif 521 } 522 523 inline static void 524 net_tx_action_dealloc(void) 525 { 526 gnttab_unmap_grant_ref_t *gop; 527 uint16_t pending_idx; 528 PEND_RING_IDX dc, dp; 529 netif_t *netif; 530 int ret; 531 532 dc = dealloc_cons; 533 dp = dealloc_prod; 534 535 /* 536 * Free up any grants we have finished using 537 */ 538 gop = tx_unmap_ops; 539 while (dc != dp) { 540 pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)]; 541 gop->host_addr = MMAP_VADDR(pending_idx); 542 gop->dev_bus_addr = 0; 543 gop->handle = grant_tx_handle[pending_idx]; 544 gop++; 545 } 546 ret = HYPERVISOR_grant_table_op( 547 GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops); 548 BUG_ON(ret); 549 550 while (dealloc_cons != dp) { 551 pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)]; 552 553 netif = pending_tx_info[pending_idx].netif; 554 555 make_tx_response(netif, pending_tx_info[pending_idx].req.id, 556 NETIF_RSP_OKAY); 557 558 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; 559 560 netif_put(netif); 561 } 562 } 563 564 static void 565 netif_page_release(void *buf, void *args) 566 { 567 uint16_t pending_idx = (unsigned int)args; 568 569 DDPRINTF("pending_idx=%u\n", pending_idx); 570 571 KASSERT(pending_idx < MAX_PENDING_REQS, ("%s: bad index %u", __func__, pending_idx)); 572 573 netif_idx_release(pending_idx); 574 } 575 576 static void 577 net_tx_action(void *context, int pending) 578 { 579 struct mbuf *m; 580 netif_t *netif; 581 netif_tx_request_t txreq; 582 uint16_t pending_idx; 583 RING_IDX i; 584 gnttab_map_grant_ref_t *mop; 585 int ret, work_to_do; 586 struct mbuf *txq = NULL, *txq_last = NULL; 587 588 if (dealloc_cons != dealloc_prod) 589 net_tx_action_dealloc(); 590 591 mop = tx_map_ops; 592 while ((NR_PENDING_REQS < MAX_PENDING_REQS) && !STAILQ_EMPTY(&tx_sched_list)) { 593 594 /* Get a netif from the list with work to do. */ 595 netif = remove_from_tx_schedule_list(); 596 597 DDPRINTF("Processing %s (prod=%u, cons=%u)\n", 598 IFNAME(netif), netif->tx.sring->req_prod, netif->tx.req_cons); 599 600 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do); 601 if (!work_to_do) { 602 netif_put(netif); 603 continue; 604 } 605 606 i = netif->tx.req_cons; 607 rmb(); /* Ensure that we see the request before we copy it. */ 608 memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq)); 609 610 /* If we want credit-based scheduling, coud add it here - WORK */ 611 612 netif->tx.req_cons++; 613 614 netif_schedule_tx_work(netif); 615 616 if (unlikely(txreq.size < ETHER_HDR_LEN) || 617 unlikely(txreq.size > (ETHER_MAX_LEN-ETHER_CRC_LEN))) { 618 WPRINTF("Bad packet size: %d\n", txreq.size); 619 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); 620 netif_put(netif); 621 continue; 622 } 623 624 /* No crossing a page as the payload mustn't fragment. */ 625 if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) { 626 WPRINTF("txreq.offset: %x, size: %u, end: %u\n", 627 txreq.offset, txreq.size, 628 (txreq.offset & PAGE_MASK) + txreq.size); 629 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); 630 netif_put(netif); 631 continue; 632 } 633 634 pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; 635 636 MGETHDR(m, M_DONTWAIT, MT_DATA); 637 if (!m) { 638 WPRINTF("Failed to allocate mbuf\n"); 639 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); 640 netif_put(netif); 641 break; 642 } 643 m->m_pkthdr.rcvif = netif->ifp; 644 645 if ((m->m_pkthdr.len = txreq.size) > PKT_PROT_LEN) { 646 struct mbuf *n; 647 MGET(n, M_DONTWAIT, MT_DATA); 648 if (!(m->m_next = n)) { 649 m_freem(m); 650 WPRINTF("Failed to allocate second mbuf\n"); 651 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); 652 netif_put(netif); 653 break; 654 } 655 n->m_len = txreq.size - PKT_PROT_LEN; 656 m->m_len = PKT_PROT_LEN; 657 } else 658 m->m_len = txreq.size; 659 660 mop->host_addr = MMAP_VADDR(pending_idx); 661 mop->dom = netif->domid; 662 mop->ref = txreq.gref; 663 mop->flags = GNTMAP_host_map | GNTMAP_readonly; 664 mop++; 665 666 memcpy(&pending_tx_info[pending_idx].req, 667 &txreq, sizeof(txreq)); 668 pending_tx_info[pending_idx].netif = netif; 669 *((uint16_t *)m->m_data) = pending_idx; 670 671 if (txq_last) 672 txq_last->m_nextpkt = m; 673 else 674 txq = m; 675 txq_last = m; 676 677 pending_cons++; 678 679 if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops)) 680 break; 681 } 682 683 if (!txq) 684 return; 685 686 ret = HYPERVISOR_grant_table_op( 687 GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops); 688 BUG_ON(ret); 689 690 mop = tx_map_ops; 691 while ((m = txq) != NULL) { 692 caddr_t data; 693 694 txq = m->m_nextpkt; 695 m->m_nextpkt = NULL; 696 697 pending_idx = *((uint16_t *)m->m_data); 698 netif = pending_tx_info[pending_idx].netif; 699 memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq)); 700 701 /* Check the remap error code. */ 702 if (unlikely(mop->status)) { 703 WPRINTF("#### netback grant fails\n"); 704 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); 705 netif_put(netif); 706 m_freem(m); 707 mop++; 708 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; 709 continue; 710 } 711 712 #if 0 713 /* Can't do this in FreeBSD since vtophys() returns the pfn */ 714 /* of the remote domain who loaned us the machine page - DPT */ 715 xen_phys_machine[(vtophys(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT)] = 716 mop->dev_bus_addr >> PAGE_SHIFT; 717 #endif 718 grant_tx_handle[pending_idx] = mop->handle; 719 720 /* Setup data in mbuf (lengths are already set) */ 721 data = (caddr_t)(MMAP_VADDR(pending_idx)|txreq.offset); 722 bcopy(data, m->m_data, m->m_len); 723 if (m->m_next) { 724 struct mbuf *n = m->m_next; 725 MEXTADD(n, MMAP_VADDR(pending_idx), PAGE_SIZE, netif_page_release, 726 (void *)(unsigned int)pending_idx, M_RDONLY, EXT_NET_DRV); 727 n->m_data = &data[PKT_PROT_LEN]; 728 } else { 729 /* Schedule a response immediately. */ 730 netif_idx_release(pending_idx); 731 } 732 733 if ((txreq.flags & NETTXF_data_validated)) { 734 /* Tell the stack the checksums are okay */ 735 m->m_pkthdr.csum_flags |= 736 (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 737 m->m_pkthdr.csum_data = 0xffff; 738 } 739 740 /* If necessary, inform stack to compute the checksums if it forwards the packet */ 741 if ((txreq.flags & NETTXF_csum_blank)) { 742 struct ether_header *eh = mtod(m, struct ether_header *); 743 if (ntohs(eh->ether_type) == ETHERTYPE_IP) { 744 struct ip *ip = (struct ip *)&m->m_data[14]; 745 if (ip->ip_p == IPPROTO_TCP) 746 m->m_pkthdr.csum_flags |= CSUM_TCP; 747 else if (ip->ip_p == IPPROTO_UDP) 748 m->m_pkthdr.csum_flags |= CSUM_UDP; 749 } 750 } 751 752 netif->ifp->if_ibytes += m->m_pkthdr.len; 753 netif->ifp->if_ipackets++; 754 755 DDPRINTF("RECV %d bytes from %s (cflags=%x)\n", 756 m->m_pkthdr.len, IFNAME(netif), m->m_pkthdr.csum_flags); 757 DPRINTF_MBUF_LEN(m, 128); 758 759 (*netif->ifp->if_input)(netif->ifp, m); 760 761 mop++; 762 } 763 } 764 765 /* Handle interrupt from a frontend */ 766 static void 767 netback_intr(void *arg) 768 { 769 netif_t *netif = arg; 770 DDPRINTF("%s\n", IFNAME(netif)); 771 add_to_tx_schedule_list_tail(netif); 772 maybe_schedule_tx_action(); 773 } 774 775 /* Removes netif from front of list and does not call netif_put() (caller must) */ 776 static netif_t * 777 remove_from_rx_schedule_list(void) 778 { 779 netif_t *netif; 780 781 mtx_lock(&rx_sched_list_lock); 782 783 if ((netif = STAILQ_FIRST(&rx_sched_list))) { 784 STAILQ_REMOVE(&rx_sched_list, netif, netback_info, next_rx); 785 STAILQ_NEXT(netif, next_rx) = NULL; 786 netif->on_rx_sched_list = 0; 787 } 788 789 mtx_unlock(&rx_sched_list_lock); 790 791 return netif; 792 } 793 794 /* Adds netif to end of list and calls netif_get() */ 795 static void 796 add_to_rx_schedule_list_tail(netif_t *netif) 797 { 798 if (netif->on_rx_sched_list) 799 return; 800 801 mtx_lock(&rx_sched_list_lock); 802 if (!netif->on_rx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) { 803 netif_get(netif); 804 STAILQ_INSERT_TAIL(&rx_sched_list, netif, next_rx); 805 netif->on_rx_sched_list = 1; 806 } 807 mtx_unlock(&rx_sched_list_lock); 808 } 809 810 static int 811 make_rx_response(netif_t *netif, uint16_t id, int8_t st, 812 uint16_t offset, uint16_t size, uint16_t flags) 813 { 814 RING_IDX i = netif->rx.rsp_prod_pvt; 815 netif_rx_response_t *resp; 816 int notify; 817 818 resp = RING_GET_RESPONSE(&netif->rx, i); 819 resp->offset = offset; 820 resp->flags = flags; 821 resp->id = id; 822 resp->status = (int16_t)size; 823 if (st < 0) 824 resp->status = (int16_t)st; 825 826 DDPRINTF("rx resp(%d): off=%x fl=%x id=%x stat=%d\n", 827 i, resp->offset, resp->flags, resp->id, resp->status); 828 829 netif->rx.rsp_prod_pvt = ++i; 830 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify); 831 832 return notify; 833 } 834 835 static int 836 netif_rx(netif_t *netif) 837 { 838 struct ifnet *ifp = netif->ifp; 839 struct mbuf *m; 840 multicall_entry_t *mcl; 841 mmu_update_t *mmu; 842 gnttab_transfer_t *gop; 843 unsigned long vdata, old_mfn, new_mfn; 844 struct mbuf *rxq = NULL, *rxq_last = NULL; 845 int ret, notify = 0, pkts_dequeued = 0; 846 847 DDPRINTF("%s\n", IFNAME(netif)); 848 849 mcl = rx_mcl; 850 mmu = rx_mmu; 851 gop = grant_rx_op; 852 853 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 854 855 /* Quit if the target domain has no receive buffers */ 856 if (netif->rx.req_cons == netif->rx.sring->req_prod) 857 break; 858 859 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 860 if (m == NULL) 861 break; 862 863 pkts_dequeued++; 864 865 /* Check if we need to copy the data */ 866 if (((m->m_flags & (M_RDONLY|M_EXT)) != M_EXT) || 867 (*m->m_ext.ref_cnt > 1) || m->m_next != NULL) { 868 struct mbuf *n; 869 870 DDPRINTF("copying mbuf (fl=%x ext=%x rc=%d n=%x)\n", 871 m->m_flags, 872 (m->m_flags & M_EXT) ? m->m_ext.ext_type : 0, 873 (m->m_flags & M_EXT) ? *m->m_ext.ref_cnt : 0, 874 (unsigned int)m->m_next); 875 876 /* Make copy */ 877 MGETHDR(n, M_DONTWAIT, MT_DATA); 878 if (!n) 879 goto drop; 880 881 MCLGET(n, M_DONTWAIT); 882 if (!(n->m_flags & M_EXT)) { 883 m_freem(n); 884 goto drop; 885 } 886 887 /* Leave space at front and keep current alignment */ 888 n->m_data += 16 + ((unsigned int)m->m_data & 0x3); 889 890 if (m->m_pkthdr.len > M_TRAILINGSPACE(n)) { 891 WPRINTF("pkt to big %d\n", m->m_pkthdr.len); 892 m_freem(n); 893 goto drop; 894 } 895 m_copydata(m, 0, m->m_pkthdr.len, n->m_data); 896 n->m_pkthdr.len = n->m_len = m->m_pkthdr.len; 897 n->m_pkthdr.csum_flags = (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA); 898 m_freem(m); 899 m = n; 900 } 901 902 vdata = (unsigned long)m->m_data; 903 old_mfn = vtomach(vdata) >> PAGE_SHIFT; 904 905 if ((new_mfn = alloc_mfn()) == 0) 906 goto drop; 907 908 #ifdef XEN_NETBACK_FIXUP_CSUM 909 /* Check if we need to compute a checksum. This happens */ 910 /* when bridging from one domain to another. */ 911 if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) 912 fixup_checksum(m); 913 #endif 914 915 xen_phys_machine[(vtophys(vdata) >> PAGE_SHIFT)] = new_mfn; 916 917 mcl->op = __HYPERVISOR_update_va_mapping; 918 mcl->args[0] = vdata; 919 mcl->args[1] = (new_mfn << PAGE_SHIFT) | PG_V | PG_RW | PG_M | PG_A; 920 mcl->args[2] = 0; 921 mcl->args[3] = 0; 922 mcl++; 923 924 gop->mfn = old_mfn; 925 gop->domid = netif->domid; 926 gop->ref = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons)->gref; 927 netif->rx.req_cons++; 928 gop++; 929 930 mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; 931 mmu->val = vtophys(vdata) >> PAGE_SHIFT; 932 mmu++; 933 934 if (rxq_last) 935 rxq_last->m_nextpkt = m; 936 else 937 rxq = m; 938 rxq_last = m; 939 940 DDPRINTF("XMIT %d bytes to %s\n", m->m_pkthdr.len, IFNAME(netif)); 941 DPRINTF_MBUF_LEN(m, 128); 942 943 /* Filled the batch queue? */ 944 if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op)) 945 break; 946 947 continue; 948 drop: 949 DDPRINTF("dropping pkt\n"); 950 ifp->if_oerrors++; 951 m_freem(m); 952 } 953 954 if (mcl == rx_mcl) 955 return pkts_dequeued; 956 957 mcl->op = __HYPERVISOR_mmu_update; 958 mcl->args[0] = (unsigned long)rx_mmu; 959 mcl->args[1] = mmu - rx_mmu; 960 mcl->args[2] = 0; 961 mcl->args[3] = DOMID_SELF; 962 mcl++; 963 964 mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; 965 ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl); 966 BUG_ON(ret != 0); 967 968 ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, gop - grant_rx_op); 969 BUG_ON(ret != 0); 970 971 mcl = rx_mcl; 972 gop = grant_rx_op; 973 974 while ((m = rxq) != NULL) { 975 int8_t status; 976 uint16_t id, flags = 0; 977 978 rxq = m->m_nextpkt; 979 m->m_nextpkt = NULL; 980 981 /* Rederive the machine addresses. */ 982 new_mfn = mcl->args[1] >> PAGE_SHIFT; 983 old_mfn = gop->mfn; 984 985 ifp->if_obytes += m->m_pkthdr.len; 986 ifp->if_opackets++; 987 988 /* The update_va_mapping() must not fail. */ 989 BUG_ON(mcl->result != 0); 990 991 /* Setup flags */ 992 if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) 993 flags |= NETRXF_csum_blank | NETRXF_data_validated; 994 else if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) 995 flags |= NETRXF_data_validated; 996 997 /* Check the reassignment error code. */ 998 status = NETIF_RSP_OKAY; 999 if (gop->status != 0) { 1000 DPRINTF("Bad status %d from grant transfer to DOM%u\n", 1001 gop->status, netif->domid); 1002 /* 1003 * Page no longer belongs to us unless GNTST_bad_page, 1004 * but that should be a fatal error anyway. 1005 */ 1006 BUG_ON(gop->status == GNTST_bad_page); 1007 status = NETIF_RSP_ERROR; 1008 } 1009 id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id; 1010 notify |= make_rx_response(netif, id, status, 1011 (unsigned long)m->m_data & PAGE_MASK, 1012 m->m_pkthdr.len, flags); 1013 1014 m_freem(m); 1015 mcl++; 1016 gop++; 1017 } 1018 1019 if (notify) 1020 notify_remote_via_irq(netif->irq); 1021 1022 return pkts_dequeued; 1023 } 1024 1025 static void 1026 rx_task_timer(void *arg) 1027 { 1028 DDPRINTF("\n"); 1029 taskqueue_enqueue(taskqueue_swi, &net_rx_task); 1030 } 1031 1032 static void 1033 net_rx_action(void *context, int pending) 1034 { 1035 netif_t *netif, *last_zero_work = NULL; 1036 1037 DDPRINTF("\n"); 1038 1039 while ((netif = remove_from_rx_schedule_list())) { 1040 struct ifnet *ifp = netif->ifp; 1041 1042 if (netif == last_zero_work) { 1043 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) 1044 add_to_rx_schedule_list_tail(netif); 1045 netif_put(netif); 1046 if (!STAILQ_EMPTY(&rx_sched_list)) 1047 callout_reset(&rx_task_callout, 1, rx_task_timer, NULL); 1048 break; 1049 } 1050 1051 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { 1052 if (netif_rx(netif)) 1053 last_zero_work = NULL; 1054 else if (!last_zero_work) 1055 last_zero_work = netif; 1056 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) 1057 add_to_rx_schedule_list_tail(netif); 1058 } 1059 1060 netif_put(netif); 1061 } 1062 } 1063 1064 static void 1065 netback_start(struct ifnet *ifp) 1066 { 1067 netif_t *netif = (netif_t *)ifp->if_softc; 1068 1069 DDPRINTF("%s\n", IFNAME(netif)); 1070 1071 add_to_rx_schedule_list_tail(netif); 1072 taskqueue_enqueue(taskqueue_swi, &net_rx_task); 1073 } 1074 1075 /* Map a grant ref to a ring */ 1076 static int 1077 map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring) 1078 { 1079 struct gnttab_map_grant_ref op; 1080 1081 ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE); 1082 if (ring->va == 0) 1083 return ENOMEM; 1084 1085 op.host_addr = ring->va; 1086 op.flags = GNTMAP_host_map; 1087 op.ref = ref; 1088 op.dom = dom; 1089 HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); 1090 if (op.status) { 1091 WPRINTF("grant table op err=%d\n", op.status); 1092 kmem_free(kernel_map, ring->va, PAGE_SIZE); 1093 ring->va = 0; 1094 return EACCES; 1095 } 1096 1097 ring->handle = op.handle; 1098 ring->bus_addr = op.dev_bus_addr; 1099 1100 return 0; 1101 } 1102 1103 /* Unmap grant ref for a ring */ 1104 static void 1105 unmap_ring(struct ring_ref *ring) 1106 { 1107 struct gnttab_unmap_grant_ref op; 1108 1109 op.host_addr = ring->va; 1110 op.dev_bus_addr = ring->bus_addr; 1111 op.handle = ring->handle; 1112 HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); 1113 if (op.status) 1114 WPRINTF("grant table op err=%d\n", op.status); 1115 1116 kmem_free(kernel_map, ring->va, PAGE_SIZE); 1117 ring->va = 0; 1118 } 1119 1120 static int 1121 connect_rings(netif_t *netif) 1122 { 1123 struct xenbus_device *xdev = netif->xdev; 1124 netif_tx_sring_t *txs; 1125 netif_rx_sring_t *rxs; 1126 unsigned long tx_ring_ref, rx_ring_ref; 1127 evtchn_port_t evtchn; 1128 evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain }; 1129 int err; 1130 1131 // Grab FE data and map his memory 1132 err = xenbus_gather(NULL, xdev->otherend, 1133 "tx-ring-ref", "%lu", &tx_ring_ref, 1134 "rx-ring-ref", "%lu", &rx_ring_ref, 1135 "event-channel", "%u", &evtchn, NULL); 1136 if (err) { 1137 xenbus_dev_fatal(xdev, err, 1138 "reading %s/ring-ref and event-channel", 1139 xdev->otherend); 1140 return err; 1141 } 1142 1143 err = map_ring(tx_ring_ref, netif->domid, &netif->tx_ring_ref); 1144 if (err) { 1145 xenbus_dev_fatal(xdev, err, "mapping tx ring"); 1146 return err; 1147 } 1148 txs = (netif_tx_sring_t *)netif->tx_ring_ref.va; 1149 BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE); 1150 1151 err = map_ring(rx_ring_ref, netif->domid, &netif->rx_ring_ref); 1152 if (err) { 1153 unmap_ring(&netif->tx_ring_ref); 1154 xenbus_dev_fatal(xdev, err, "mapping rx ring"); 1155 return err; 1156 } 1157 rxs = (netif_rx_sring_t *)netif->rx_ring_ref.va; 1158 BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE); 1159 1160 op.u.bind_interdomain.remote_dom = netif->domid; 1161 op.u.bind_interdomain.remote_port = evtchn; 1162 err = HYPERVISOR_event_channel_op(&op); 1163 if (err) { 1164 unmap_ring(&netif->tx_ring_ref); 1165 unmap_ring(&netif->rx_ring_ref); 1166 xenbus_dev_fatal(xdev, err, "binding event channel"); 1167 return err; 1168 } 1169 netif->evtchn = op.u.bind_interdomain.local_port; 1170 1171 /* bind evtchn to irq handler */ 1172 netif->irq = 1173 bind_evtchn_to_irqhandler(netif->evtchn, "netback", 1174 netback_intr, netif, INTR_TYPE_NET|INTR_MPSAFE, &netif->irq_cookie); 1175 1176 netif->rings_connected = 1; 1177 1178 DPRINTF("%s connected! evtchn=%d irq=%d\n", 1179 IFNAME(netif), netif->evtchn, netif->irq); 1180 1181 return 0; 1182 } 1183 1184 static void 1185 disconnect_rings(netif_t *netif) 1186 { 1187 DPRINTF("\n"); 1188 1189 if (netif->rings_connected) { 1190 unbind_from_irqhandler(netif->irq, netif->irq_cookie); 1191 netif->irq = 0; 1192 unmap_ring(&netif->tx_ring_ref); 1193 unmap_ring(&netif->rx_ring_ref); 1194 netif->rings_connected = 0; 1195 } 1196 } 1197 1198 static void 1199 connect(netif_t *netif) 1200 { 1201 if (!netif->xdev || 1202 !netif->attached || 1203 netif->frontend_state != XenbusStateConnected) { 1204 return; 1205 } 1206 1207 if (!connect_rings(netif)) { 1208 xenbus_switch_state(netif->xdev, NULL, XenbusStateConnected); 1209 1210 /* Turn on interface */ 1211 netif->ifp->if_drv_flags |= IFF_DRV_RUNNING; 1212 netif->ifp->if_flags |= IFF_UP; 1213 } 1214 } 1215 1216 static int 1217 netback_remove(struct xenbus_device *xdev) 1218 { 1219 netif_t *netif = xdev->data; 1220 device_t ndev; 1221 1222 DPRINTF("remove %s\n", xdev->nodename); 1223 1224 if ((ndev = netif->ndev)) { 1225 netif->ndev = NULL; 1226 mtx_lock(&Giant); 1227 device_detach(ndev); 1228 mtx_unlock(&Giant); 1229 } 1230 1231 xdev->data = NULL; 1232 netif->xdev = NULL; 1233 netif_put(netif); 1234 1235 return 0; 1236 } 1237 1238 /** 1239 * Entry point to this code when a new device is created. Allocate the basic 1240 * structures and the ring buffers for communication with the frontend. 1241 * Switch to Connected state. 1242 */ 1243 static int 1244 netback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id) 1245 { 1246 int err; 1247 long handle; 1248 char *bridge; 1249 1250 DPRINTF("node=%s\n", xdev->nodename); 1251 1252 /* Grab the handle */ 1253 err = xenbus_scanf(NULL, xdev->nodename, "handle", "%li", &handle); 1254 if (err != 1) { 1255 xenbus_dev_fatal(xdev, err, "reading handle"); 1256 return err; 1257 } 1258 1259 /* Check for bridge */ 1260 bridge = xenbus_read(NULL, xdev->nodename, "bridge", NULL); 1261 if (IS_ERR(bridge)) 1262 bridge = NULL; 1263 1264 err = xenbus_switch_state(xdev, NULL, XenbusStateInitWait); 1265 if (err) { 1266 xenbus_dev_fatal(xdev, err, "writing switch state"); 1267 return err; 1268 } 1269 1270 err = netif_create(handle, xdev, bridge); 1271 if (err) { 1272 xenbus_dev_fatal(xdev, err, "creating netif"); 1273 return err; 1274 } 1275 1276 err = vif_add_dev(xdev); 1277 if (err) { 1278 netif_put((netif_t *)xdev->data); 1279 xenbus_dev_fatal(xdev, err, "adding vif device"); 1280 return err; 1281 } 1282 1283 return 0; 1284 } 1285 1286 /** 1287 * We are reconnecting to the backend, due to a suspend/resume, or a backend 1288 * driver restart. We tear down our netif structure and recreate it, but 1289 * leave the device-layer structures intact so that this is transparent to the 1290 * rest of the kernel. 1291 */ 1292 static int netback_resume(struct xenbus_device *xdev) 1293 { 1294 DPRINTF("node=%s\n", xdev->nodename); 1295 return 0; 1296 } 1297 1298 1299 /** 1300 * Callback received when the frontend's state changes. 1301 */ 1302 static void frontend_changed(struct xenbus_device *xdev, 1303 XenbusState frontend_state) 1304 { 1305 netif_t *netif = xdev->data; 1306 1307 DPRINTF("state=%d\n", frontend_state); 1308 1309 netif->frontend_state = frontend_state; 1310 1311 switch (frontend_state) { 1312 case XenbusStateInitialising: 1313 case XenbusStateInitialised: 1314 break; 1315 case XenbusStateConnected: 1316 connect(netif); 1317 break; 1318 case XenbusStateClosing: 1319 xenbus_switch_state(xdev, NULL, XenbusStateClosing); 1320 break; 1321 case XenbusStateClosed: 1322 xenbus_remove_device(xdev); 1323 break; 1324 case XenbusStateUnknown: 1325 case XenbusStateInitWait: 1326 xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend", 1327 frontend_state); 1328 break; 1329 } 1330 } 1331 1332 /* ** Driver registration ** */ 1333 1334 static struct xenbus_device_id netback_ids[] = { 1335 { "vif" }, 1336 { "" } 1337 }; 1338 1339 static struct xenbus_driver netback = { 1340 .name = "netback", 1341 .ids = netback_ids, 1342 .probe = netback_probe, 1343 .remove = netback_remove, 1344 .resume= netback_resume, 1345 .otherend_changed = frontend_changed, 1346 }; 1347 1348 static void 1349 netback_init(void *unused) 1350 { 1351 callout_init(&rx_task_callout, CALLOUT_MPSAFE); 1352 1353 mmap_vstart = alloc_empty_page_range(MAX_PENDING_REQS); 1354 BUG_ON(!mmap_vstart); 1355 1356 pending_cons = 0; 1357 for (pending_prod = 0; pending_prod < MAX_PENDING_REQS; pending_prod++) 1358 pending_ring[pending_prod] = pending_prod; 1359 1360 TASK_INIT(&net_tx_task, 0, net_tx_action, NULL); 1361 TASK_INIT(&net_rx_task, 0, net_rx_action, NULL); 1362 mtx_init(&tx_sched_list_lock, "nb_tx_sched_lock", "netback tx sched lock", MTX_DEF); 1363 mtx_init(&rx_sched_list_lock, "nb_rx_sched_lock", "netback rx sched lock", MTX_DEF); 1364 1365 DPRINTF("registering %s\n", netback.name); 1366 1367 xenbus_register_backend(&netback); 1368 } 1369 1370 SYSINIT(xnbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, netback_init, NULL) 1371 1372 static int 1373 vif_add_dev(struct xenbus_device *xdev) 1374 { 1375 netif_t *netif = xdev->data; 1376 device_t nexus, ndev; 1377 devclass_t dc; 1378 int err = 0; 1379 1380 mtx_lock(&Giant); 1381 1382 /* We will add a vif device as a child of nexus0 (for now) */ 1383 if (!(dc = devclass_find("nexus")) || 1384 !(nexus = devclass_get_device(dc, 0))) { 1385 WPRINTF("could not find nexus0!\n"); 1386 err = ENOENT; 1387 goto done; 1388 } 1389 1390 1391 /* Create a newbus device representing the vif */ 1392 ndev = BUS_ADD_CHILD(nexus, 0, "vif", netif->ifp->if_dunit); 1393 if (!ndev) { 1394 WPRINTF("could not create newbus device %s!\n", IFNAME(netif)); 1395 err = EFAULT; 1396 goto done; 1397 } 1398 1399 netif_get(netif); 1400 device_set_ivars(ndev, netif); 1401 netif->ndev = ndev; 1402 1403 device_probe_and_attach(ndev); 1404 1405 done: 1406 1407 mtx_unlock(&Giant); 1408 1409 return err; 1410 } 1411 1412 enum { 1413 VIF_SYSCTL_DOMID, 1414 VIF_SYSCTL_HANDLE, 1415 VIF_SYSCTL_TXRING, 1416 VIF_SYSCTL_RXRING, 1417 }; 1418 1419 static char * 1420 vif_sysctl_ring_info(netif_t *netif, int cmd) 1421 { 1422 char *buf = malloc(256, M_DEVBUF, M_WAITOK); 1423 if (buf) { 1424 if (!netif->rings_connected) 1425 sprintf(buf, "rings not connected\n"); 1426 else if (cmd == VIF_SYSCTL_TXRING) { 1427 netif_tx_back_ring_t *tx = &netif->tx; 1428 sprintf(buf, "nr_ents=%x req_cons=%x" 1429 " req_prod=%x req_event=%x" 1430 " rsp_prod=%x rsp_event=%x", 1431 tx->nr_ents, tx->req_cons, 1432 tx->sring->req_prod, tx->sring->req_event, 1433 tx->sring->rsp_prod, tx->sring->rsp_event); 1434 } else { 1435 netif_rx_back_ring_t *rx = &netif->rx; 1436 sprintf(buf, "nr_ents=%x req_cons=%x" 1437 " req_prod=%x req_event=%x" 1438 " rsp_prod=%x rsp_event=%x", 1439 rx->nr_ents, rx->req_cons, 1440 rx->sring->req_prod, rx->sring->req_event, 1441 rx->sring->rsp_prod, rx->sring->rsp_event); 1442 } 1443 } 1444 return buf; 1445 } 1446 1447 static int 1448 vif_sysctl_handler(SYSCTL_HANDLER_ARGS) 1449 { 1450 device_t dev = (device_t)arg1; 1451 netif_t *netif = (netif_t *)device_get_ivars(dev); 1452 const char *value; 1453 char *buf = NULL; 1454 int err; 1455 1456 switch (arg2) { 1457 case VIF_SYSCTL_DOMID: 1458 return sysctl_handle_int(oidp, NULL, netif->domid, req); 1459 case VIF_SYSCTL_HANDLE: 1460 return sysctl_handle_int(oidp, NULL, netif->handle, req); 1461 case VIF_SYSCTL_TXRING: 1462 case VIF_SYSCTL_RXRING: 1463 value = buf = vif_sysctl_ring_info(netif, arg2); 1464 break; 1465 default: 1466 return (EINVAL); 1467 } 1468 1469 err = SYSCTL_OUT(req, value, strlen(value)); 1470 if (buf != NULL) 1471 free(buf, M_DEVBUF); 1472 1473 return err; 1474 } 1475 1476 /* Newbus vif device driver probe */ 1477 static int 1478 vif_probe(device_t dev) 1479 { 1480 DDPRINTF("vif%d\n", device_get_unit(dev)); 1481 return 0; 1482 } 1483 1484 /* Newbus vif device driver attach */ 1485 static int 1486 vif_attach(device_t dev) 1487 { 1488 netif_t *netif = (netif_t *)device_get_ivars(dev); 1489 uint8_t mac[ETHER_ADDR_LEN]; 1490 1491 DDPRINTF("%s\n", IFNAME(netif)); 1492 1493 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 1494 OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD, 1495 dev, VIF_SYSCTL_DOMID, vif_sysctl_handler, "I", 1496 "domid of frontend"); 1497 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 1498 OID_AUTO, "handle", CTLTYPE_INT|CTLFLAG_RD, 1499 dev, VIF_SYSCTL_HANDLE, vif_sysctl_handler, "I", 1500 "handle of frontend"); 1501 #ifdef XEN_NETBACK_DEBUG 1502 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 1503 OID_AUTO, "txring", CTLFLAG_RD, 1504 dev, VIF_SYSCTL_TXRING, vif_sysctl_handler, "A", 1505 "tx ring info"); 1506 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 1507 OID_AUTO, "rxring", CTLFLAG_RD, 1508 dev, VIF_SYSCTL_RXRING, vif_sysctl_handler, "A", 1509 "rx ring info"); 1510 #endif 1511 1512 memset(mac, 0xff, sizeof(mac)); 1513 mac[0] &= ~0x01; 1514 1515 ether_ifattach(netif->ifp, mac); 1516 netif->attached = 1; 1517 1518 connect(netif); 1519 1520 if (netif->bridge) { 1521 DPRINTF("Adding %s to bridge %s\n", IFNAME(netif), netif->bridge); 1522 int err = add_to_bridge(netif->ifp, netif->bridge); 1523 if (err) { 1524 WPRINTF("Error adding %s to %s; err=%d\n", 1525 IFNAME(netif), netif->bridge, err); 1526 } 1527 } 1528 1529 return bus_generic_attach(dev); 1530 } 1531 1532 /* Newbus vif device driver detach */ 1533 static int 1534 vif_detach(device_t dev) 1535 { 1536 netif_t *netif = (netif_t *)device_get_ivars(dev); 1537 struct ifnet *ifp = netif->ifp; 1538 1539 DDPRINTF("%s\n", IFNAME(netif)); 1540 1541 /* Tell the stack that the interface is no longer active */ 1542 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 1543 1544 ether_ifdetach(ifp); 1545 1546 bus_generic_detach(dev); 1547 1548 netif->attached = 0; 1549 1550 netif_put(netif); 1551 1552 return 0; 1553 } 1554 1555 static device_method_t vif_methods[] = { 1556 /* Device interface */ 1557 DEVMETHOD(device_probe, vif_probe), 1558 DEVMETHOD(device_attach, vif_attach), 1559 DEVMETHOD(device_detach, vif_detach), 1560 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1561 DEVMETHOD(device_suspend, bus_generic_suspend), 1562 DEVMETHOD(device_resume, bus_generic_resume), 1563 {0, 0} 1564 }; 1565 1566 static devclass_t vif_devclass; 1567 1568 static driver_t vif_driver = { 1569 "vif", 1570 vif_methods, 1571 0, 1572 }; 1573 1574 DRIVER_MODULE(vif, nexus, vif_driver, vif_devclass, 0, 0); 1575 1576 1577 /* 1578 * Local variables: 1579 * mode: C 1580 * c-set-style: "BSD" 1581 * c-basic-offset: 4 1582 * tab-width: 4 1583 * indent-tabs-mode: t 1584 * End: 1585 */ 1586