1 /* 2 * Copyright (c) 2006, Cisco Systems, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors 15 * may be used to endorse or promote products derived from this software 16 * without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 #include "opt_sctp.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/sockio.h> 38 #include <sys/mbuf.h> 39 #include <sys/malloc.h> 40 #include <sys/kernel.h> 41 #include <sys/socket.h> 42 #include <sys/queue.h> 43 #include <sys/taskqueue.h> 44 45 #include <sys/module.h> 46 #include <sys/bus.h> 47 #include <sys/sysctl.h> 48 49 #include <net/if.h> 50 #include <net/if_arp.h> 51 #include <net/if_types.h> 52 #include <net/ethernet.h> 53 #include <net/if_bridgevar.h> 54 55 #include <netinet/in_systm.h> 56 #include <netinet/in.h> 57 #include <netinet/in_var.h> 58 #include <netinet/ip.h> 59 #include <netinet/tcp.h> 60 #include <netinet/udp.h> 61 #ifdef SCTP 62 #include <netinet/sctp.h> 63 #include <netinet/sctp_crc32.h> 64 #endif 65 66 #include <vm/vm_extern.h> 67 #include <vm/vm_kern.h> 68 69 #include <machine/in_cksum.h> 70 #include <machine/xen-os.h> 71 #include <machine/hypervisor.h> 72 #include <machine/hypervisor-ifs.h> 73 #include <machine/xen_intr.h> 74 #include <machine/evtchn.h> 75 #include <machine/xenbus.h> 76 #include <machine/gnttab.h> 77 #include <machine/xen-public/memory.h> 78 #include <dev/xen/xenbus/xenbus_comms.h> 79 80 81 #ifdef XEN_NETBACK_DEBUG 82 #define DPRINTF(fmt, args...) \ 83 printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 84 #else 85 #define DPRINTF(fmt, args...) ((void)0) 86 #endif 87 88 #ifdef XEN_NETBACK_DEBUG_LOTS 89 #define DDPRINTF(fmt, args...) \ 90 printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 91 #define DPRINTF_MBUF(_m) print_mbuf(_m, 0) 92 #define DPRINTF_MBUF_LEN(_m, _len) print_mbuf(_m, _len) 93 #else 94 #define DDPRINTF(fmt, args...) ((void)0) 95 #define DPRINTF_MBUF(_m) ((void)0) 96 #define DPRINTF_MBUF_LEN(_m, _len) ((void)0) 97 #endif 98 99 #define WPRINTF(fmt, args...) \ 100 printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 101 102 #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) 103 #define BUG_ON PANIC_IF 104 105 #define IFNAME(_np) (_np)->ifp->if_xname 106 107 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) 108 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) 109 110 struct ring_ref { 111 vm_offset_t va; 112 grant_handle_t handle; 113 uint64_t bus_addr; 114 }; 115 116 typedef struct netback_info { 117 118 /* Schedule lists */ 119 STAILQ_ENTRY(netback_info) next_tx; 120 STAILQ_ENTRY(netback_info) next_rx; 121 int on_tx_sched_list; 122 int on_rx_sched_list; 123 124 struct xenbus_device *xdev; 125 XenbusState frontend_state; 126 127 domid_t domid; 128 int handle; 129 char *bridge; 130 131 int rings_connected; 132 struct ring_ref tx_ring_ref; 133 struct ring_ref rx_ring_ref; 134 netif_tx_back_ring_t tx; 135 netif_rx_back_ring_t rx; 136 evtchn_port_t evtchn; 137 int irq; 138 void *irq_cookie; 139 140 struct ifnet *ifp; 141 int ref_cnt; 142 143 device_t ndev; 144 int attached; 145 } netif_t; 146 147 148 #define MAX_PENDING_REQS 256 149 #define PKT_PROT_LEN 64 150 151 static struct { 152 netif_tx_request_t req; 153 netif_t *netif; 154 } pending_tx_info[MAX_PENDING_REQS]; 155 static uint16_t pending_ring[MAX_PENDING_REQS]; 156 typedef unsigned int PEND_RING_IDX; 157 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) 158 static PEND_RING_IDX pending_prod, pending_cons; 159 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) 160 161 static unsigned long mmap_vstart; 162 #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE)) 163 164 /* Freed TX mbufs get batched on this ring before return to pending_ring. */ 165 static uint16_t dealloc_ring[MAX_PENDING_REQS]; 166 static PEND_RING_IDX dealloc_prod, dealloc_cons; 167 168 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1]; 169 static mmu_update_t rx_mmu[NET_RX_RING_SIZE]; 170 static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE]; 171 172 static grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; 173 static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS]; 174 static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS]; 175 176 static struct task net_tx_task, net_rx_task; 177 static struct callout rx_task_callout; 178 179 static STAILQ_HEAD(netback_tx_sched_list, netback_info) tx_sched_list = 180 STAILQ_HEAD_INITIALIZER(tx_sched_list); 181 static STAILQ_HEAD(netback_rx_sched_list, netback_info) rx_sched_list = 182 STAILQ_HEAD_INITIALIZER(rx_sched_list); 183 static struct mtx tx_sched_list_lock; 184 static struct mtx rx_sched_list_lock; 185 186 static int vif_unit_maker = 0; 187 188 /* Protos */ 189 static void netback_start(struct ifnet *ifp); 190 static int netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); 191 static int vif_add_dev(struct xenbus_device *xdev); 192 static void disconnect_rings(netif_t *netif); 193 194 #ifdef XEN_NETBACK_DEBUG_LOTS 195 /* Debug code to display the contents of an mbuf */ 196 static void 197 print_mbuf(struct mbuf *m, int max) 198 { 199 int i, j=0; 200 printf("mbuf %08x len = %d", (unsigned int)m, m->m_pkthdr.len); 201 for (; m; m = m->m_next) { 202 unsigned char *d = m->m_data; 203 for (i=0; i < m->m_len; i++) { 204 if (max && j == max) 205 break; 206 if ((j++ % 16) == 0) 207 printf("\n%04x:", j); 208 printf(" %02x", d[i]); 209 } 210 } 211 printf("\n"); 212 } 213 #endif 214 215 216 #define MAX_MFN_ALLOC 64 217 static unsigned long mfn_list[MAX_MFN_ALLOC]; 218 static unsigned int alloc_index = 0; 219 220 static unsigned long 221 alloc_mfn(void) 222 { 223 unsigned long mfn = 0; 224 struct xen_memory_reservation reservation = { 225 .extent_start = mfn_list, 226 .nr_extents = MAX_MFN_ALLOC, 227 .extent_order = 0, 228 .domid = DOMID_SELF 229 }; 230 if ( unlikely(alloc_index == 0) ) 231 alloc_index = HYPERVISOR_memory_op( 232 XENMEM_increase_reservation, &reservation); 233 if ( alloc_index != 0 ) 234 mfn = mfn_list[--alloc_index]; 235 return mfn; 236 } 237 238 static unsigned long 239 alloc_empty_page_range(unsigned long nr_pages) 240 { 241 void *pages; 242 int i = 0, j = 0; 243 multicall_entry_t mcl[17]; 244 unsigned long mfn_list[16]; 245 struct xen_memory_reservation reservation = { 246 .extent_start = mfn_list, 247 .nr_extents = 0, 248 .address_bits = 0, 249 .extent_order = 0, 250 .domid = DOMID_SELF 251 }; 252 253 pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT); 254 if (pages == NULL) 255 return 0; 256 257 memset(mcl, 0, sizeof(mcl)); 258 259 while (i < nr_pages) { 260 unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE); 261 262 mcl[j].op = __HYPERVISOR_update_va_mapping; 263 mcl[j].args[0] = va; 264 265 mfn_list[j++] = vtomach(va) >> PAGE_SHIFT; 266 267 xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY; 268 269 if (j == 16 || i == nr_pages) { 270 mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL; 271 272 reservation.nr_extents = j; 273 274 mcl[j].op = __HYPERVISOR_memory_op; 275 mcl[j].args[0] = XENMEM_decrease_reservation; 276 mcl[j].args[1] = (unsigned long)&reservation; 277 278 (void)HYPERVISOR_multicall(mcl, j+1); 279 280 mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0; 281 j = 0; 282 } 283 } 284 285 return (unsigned long)pages; 286 } 287 288 #ifdef XEN_NETBACK_FIXUP_CSUM 289 static void 290 fixup_checksum(struct mbuf *m) 291 { 292 struct ether_header *eh = mtod(m, struct ether_header *); 293 struct ip *ip = (struct ip *)(eh + 1); 294 int iphlen = ip->ip_hl << 2; 295 int iplen = ntohs(ip->ip_len); 296 297 if ((m->m_pkthdr.csum_flags & CSUM_TCP)) { 298 struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iphlen); 299 th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 300 htons(IPPROTO_TCP + (iplen - iphlen))); 301 th->th_sum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen); 302 m->m_pkthdr.csum_flags &= ~CSUM_TCP; 303 #ifdef SCTP 304 } else if (sw_csum & CSUM_SCTP) { 305 sctp_delayed_cksum(m, iphlen); 306 sw_csum &= ~CSUM_SCTP; 307 #endif 308 } else { 309 u_short csum; 310 struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen); 311 uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 312 htons(IPPROTO_UDP + (iplen - iphlen))); 313 if ((csum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen)) == 0) 314 csum = 0xffff; 315 uh->uh_sum = csum; 316 m->m_pkthdr.csum_flags &= ~CSUM_UDP; 317 } 318 } 319 #endif 320 321 /* Add the interface to the specified bridge */ 322 static int 323 add_to_bridge(struct ifnet *ifp, char *bridge) 324 { 325 struct ifdrv ifd; 326 struct ifbreq ifb; 327 struct ifnet *ifp_bridge = ifunit(bridge); 328 329 if (!ifp_bridge) 330 return ENOENT; 331 332 bzero(&ifd, sizeof(ifd)); 333 bzero(&ifb, sizeof(ifb)); 334 335 strcpy(ifb.ifbr_ifsname, ifp->if_xname); 336 strcpy(ifd.ifd_name, ifp->if_xname); 337 ifd.ifd_cmd = BRDGADD; 338 ifd.ifd_len = sizeof(ifb); 339 ifd.ifd_data = &ifb; 340 341 return bridge_ioctl_kern(ifp_bridge, SIOCSDRVSPEC, &ifd); 342 343 } 344 345 static int 346 netif_create(int handle, struct xenbus_device *xdev, char *bridge) 347 { 348 netif_t *netif; 349 struct ifnet *ifp; 350 351 netif = (netif_t *)malloc(sizeof(*netif), M_DEVBUF, M_NOWAIT | M_ZERO); 352 if (!netif) 353 return ENOMEM; 354 355 netif->ref_cnt = 1; 356 netif->handle = handle; 357 netif->domid = xdev->otherend_id; 358 netif->xdev = xdev; 359 netif->bridge = bridge; 360 xdev->data = netif; 361 362 /* Set up ifnet structure */ 363 ifp = netif->ifp = if_alloc(IFT_ETHER); 364 if (!ifp) { 365 if (bridge) 366 free(bridge, M_DEVBUF); 367 free(netif, M_DEVBUF); 368 return ENOMEM; 369 } 370 371 ifp->if_softc = netif; 372 if_initname(ifp, "vif", 373 atomic_fetchadd_int(&vif_unit_maker, 1) /* ifno */ ); 374 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX; 375 ifp->if_output = ether_output; 376 ifp->if_start = netback_start; 377 ifp->if_ioctl = netback_ioctl; 378 ifp->if_mtu = ETHERMTU; 379 ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1; 380 381 DPRINTF("Created %s for domid=%d handle=%d\n", IFNAME(netif), netif->domid, netif->handle); 382 383 return 0; 384 } 385 386 static void 387 netif_get(netif_t *netif) 388 { 389 atomic_add_int(&netif->ref_cnt, 1); 390 } 391 392 static void 393 netif_put(netif_t *netif) 394 { 395 if (atomic_fetchadd_int(&netif->ref_cnt, -1) == 1) { 396 DPRINTF("%s\n", IFNAME(netif)); 397 disconnect_rings(netif); 398 if (netif->ifp) { 399 if_free(netif->ifp); 400 netif->ifp = NULL; 401 } 402 if (netif->bridge) 403 free(netif->bridge, M_DEVBUF); 404 free(netif, M_DEVBUF); 405 } 406 } 407 408 static int 409 netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 410 { 411 switch (cmd) { 412 case SIOCSIFFLAGS: 413 DDPRINTF("%s cmd=SIOCSIFFLAGS flags=%x\n", 414 IFNAME((struct netback_info *)ifp->if_softc), ((struct ifreq *)data)->ifr_flags); 415 return 0; 416 } 417 418 DDPRINTF("%s cmd=%lx\n", IFNAME((struct netback_info *)ifp->if_softc), cmd); 419 420 return ether_ioctl(ifp, cmd, data); 421 } 422 423 static inline void 424 maybe_schedule_tx_action(void) 425 { 426 smp_mb(); 427 if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && !STAILQ_EMPTY(&tx_sched_list)) 428 taskqueue_enqueue(taskqueue_swi, &net_tx_task); 429 } 430 431 /* Removes netif from front of list and does not call netif_put() (caller must) */ 432 static netif_t * 433 remove_from_tx_schedule_list(void) 434 { 435 netif_t *netif; 436 437 mtx_lock(&tx_sched_list_lock); 438 439 if ((netif = STAILQ_FIRST(&tx_sched_list))) { 440 STAILQ_REMOVE(&tx_sched_list, netif, netback_info, next_tx); 441 STAILQ_NEXT(netif, next_tx) = NULL; 442 netif->on_tx_sched_list = 0; 443 } 444 445 mtx_unlock(&tx_sched_list_lock); 446 447 return netif; 448 } 449 450 /* Adds netif to end of list and calls netif_get() */ 451 static void 452 add_to_tx_schedule_list_tail(netif_t *netif) 453 { 454 if (netif->on_tx_sched_list) 455 return; 456 457 mtx_lock(&tx_sched_list_lock); 458 if (!netif->on_tx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) { 459 netif_get(netif); 460 STAILQ_INSERT_TAIL(&tx_sched_list, netif, next_tx); 461 netif->on_tx_sched_list = 1; 462 } 463 mtx_unlock(&tx_sched_list_lock); 464 } 465 466 /* 467 * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER: 468 * If this driver is pipelining transmit requests then we can be very 469 * aggressive in avoiding new-packet notifications -- frontend only needs to 470 * send a notification if there are no outstanding unreceived responses. 471 * If we may be buffer transmit buffers for any reason then we must be rather 472 * more conservative and treat this as the final check for pending work. 473 */ 474 static void 475 netif_schedule_tx_work(netif_t *netif) 476 { 477 int more_to_do; 478 479 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER 480 more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx); 481 #else 482 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); 483 #endif 484 485 if (more_to_do) { 486 DDPRINTF("Adding %s to tx sched list\n", IFNAME(netif)); 487 add_to_tx_schedule_list_tail(netif); 488 maybe_schedule_tx_action(); 489 } 490 } 491 492 static struct mtx dealloc_lock; 493 MTX_SYSINIT(netback_dealloc, &dealloc_lock, "DEALLOC LOCK", MTX_SPIN | MTX_NOWITNESS); 494 495 static void 496 netif_idx_release(uint16_t pending_idx) 497 { 498 mtx_lock_spin(&dealloc_lock); 499 dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx; 500 mtx_unlock_spin(&dealloc_lock); 501 502 taskqueue_enqueue(taskqueue_swi, &net_tx_task); 503 } 504 505 static void 506 make_tx_response(netif_t *netif, 507 uint16_t id, 508 int8_t st) 509 { 510 RING_IDX i = netif->tx.rsp_prod_pvt; 511 netif_tx_response_t *resp; 512 int notify; 513 514 resp = RING_GET_RESPONSE(&netif->tx, i); 515 resp->id = id; 516 resp->status = st; 517 518 netif->tx.rsp_prod_pvt = ++i; 519 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify); 520 if (notify) 521 notify_remote_via_irq(netif->irq); 522 523 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER 524 if (i == netif->tx.req_cons) { 525 int more_to_do; 526 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); 527 if (more_to_do) 528 add_to_tx_schedule_list_tail(netif); 529 } 530 #endif 531 } 532 533 inline static void 534 net_tx_action_dealloc(void) 535 { 536 gnttab_unmap_grant_ref_t *gop; 537 uint16_t pending_idx; 538 PEND_RING_IDX dc, dp; 539 netif_t *netif; 540 int ret; 541 542 dc = dealloc_cons; 543 dp = dealloc_prod; 544 545 /* 546 * Free up any grants we have finished using 547 */ 548 gop = tx_unmap_ops; 549 while (dc != dp) { 550 pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)]; 551 gop->host_addr = MMAP_VADDR(pending_idx); 552 gop->dev_bus_addr = 0; 553 gop->handle = grant_tx_handle[pending_idx]; 554 gop++; 555 } 556 ret = HYPERVISOR_grant_table_op( 557 GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops); 558 BUG_ON(ret); 559 560 while (dealloc_cons != dp) { 561 pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)]; 562 563 netif = pending_tx_info[pending_idx].netif; 564 565 make_tx_response(netif, pending_tx_info[pending_idx].req.id, 566 NETIF_RSP_OKAY); 567 568 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; 569 570 netif_put(netif); 571 } 572 } 573 574 static void 575 netif_page_release(void *buf, void *args) 576 { 577 uint16_t pending_idx = (unsigned int)args; 578 579 DDPRINTF("pending_idx=%u\n", pending_idx); 580 581 KASSERT(pending_idx < MAX_PENDING_REQS, ("%s: bad index %u", __func__, pending_idx)); 582 583 netif_idx_release(pending_idx); 584 } 585 586 static void 587 net_tx_action(void *context, int pending) 588 { 589 struct mbuf *m; 590 netif_t *netif; 591 netif_tx_request_t txreq; 592 uint16_t pending_idx; 593 RING_IDX i; 594 gnttab_map_grant_ref_t *mop; 595 int ret, work_to_do; 596 struct mbuf *txq = NULL, *txq_last = NULL; 597 598 if (dealloc_cons != dealloc_prod) 599 net_tx_action_dealloc(); 600 601 mop = tx_map_ops; 602 while ((NR_PENDING_REQS < MAX_PENDING_REQS) && !STAILQ_EMPTY(&tx_sched_list)) { 603 604 /* Get a netif from the list with work to do. */ 605 netif = remove_from_tx_schedule_list(); 606 607 DDPRINTF("Processing %s (prod=%u, cons=%u)\n", 608 IFNAME(netif), netif->tx.sring->req_prod, netif->tx.req_cons); 609 610 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do); 611 if (!work_to_do) { 612 netif_put(netif); 613 continue; 614 } 615 616 i = netif->tx.req_cons; 617 rmb(); /* Ensure that we see the request before we copy it. */ 618 memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq)); 619 620 /* If we want credit-based scheduling, coud add it here - WORK */ 621 622 netif->tx.req_cons++; 623 624 netif_schedule_tx_work(netif); 625 626 if (unlikely(txreq.size < ETHER_HDR_LEN) || 627 unlikely(txreq.size > (ETHER_MAX_LEN-ETHER_CRC_LEN))) { 628 WPRINTF("Bad packet size: %d\n", txreq.size); 629 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); 630 netif_put(netif); 631 continue; 632 } 633 634 /* No crossing a page as the payload mustn't fragment. */ 635 if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) { 636 WPRINTF("txreq.offset: %x, size: %u, end: %u\n", 637 txreq.offset, txreq.size, 638 (txreq.offset & PAGE_MASK) + txreq.size); 639 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); 640 netif_put(netif); 641 continue; 642 } 643 644 pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; 645 646 MGETHDR(m, M_DONTWAIT, MT_DATA); 647 if (!m) { 648 WPRINTF("Failed to allocate mbuf\n"); 649 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); 650 netif_put(netif); 651 break; 652 } 653 m->m_pkthdr.rcvif = netif->ifp; 654 655 if ((m->m_pkthdr.len = txreq.size) > PKT_PROT_LEN) { 656 struct mbuf *n; 657 MGET(n, M_DONTWAIT, MT_DATA); 658 if (!(m->m_next = n)) { 659 m_freem(m); 660 WPRINTF("Failed to allocate second mbuf\n"); 661 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); 662 netif_put(netif); 663 break; 664 } 665 n->m_len = txreq.size - PKT_PROT_LEN; 666 m->m_len = PKT_PROT_LEN; 667 } else 668 m->m_len = txreq.size; 669 670 mop->host_addr = MMAP_VADDR(pending_idx); 671 mop->dom = netif->domid; 672 mop->ref = txreq.gref; 673 mop->flags = GNTMAP_host_map | GNTMAP_readonly; 674 mop++; 675 676 memcpy(&pending_tx_info[pending_idx].req, 677 &txreq, sizeof(txreq)); 678 pending_tx_info[pending_idx].netif = netif; 679 *((uint16_t *)m->m_data) = pending_idx; 680 681 if (txq_last) 682 txq_last->m_nextpkt = m; 683 else 684 txq = m; 685 txq_last = m; 686 687 pending_cons++; 688 689 if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops)) 690 break; 691 } 692 693 if (!txq) 694 return; 695 696 ret = HYPERVISOR_grant_table_op( 697 GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops); 698 BUG_ON(ret); 699 700 mop = tx_map_ops; 701 while ((m = txq) != NULL) { 702 caddr_t data; 703 704 txq = m->m_nextpkt; 705 m->m_nextpkt = NULL; 706 707 pending_idx = *((uint16_t *)m->m_data); 708 netif = pending_tx_info[pending_idx].netif; 709 memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq)); 710 711 /* Check the remap error code. */ 712 if (unlikely(mop->status)) { 713 WPRINTF("#### netback grant fails\n"); 714 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); 715 netif_put(netif); 716 m_freem(m); 717 mop++; 718 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; 719 continue; 720 } 721 722 #if 0 723 /* Can't do this in FreeBSD since vtophys() returns the pfn */ 724 /* of the remote domain who loaned us the machine page - DPT */ 725 xen_phys_machine[(vtophys(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT)] = 726 mop->dev_bus_addr >> PAGE_SHIFT; 727 #endif 728 grant_tx_handle[pending_idx] = mop->handle; 729 730 /* Setup data in mbuf (lengths are already set) */ 731 data = (caddr_t)(MMAP_VADDR(pending_idx)|txreq.offset); 732 bcopy(data, m->m_data, m->m_len); 733 if (m->m_next) { 734 struct mbuf *n = m->m_next; 735 MEXTADD(n, MMAP_VADDR(pending_idx), PAGE_SIZE, netif_page_release, 736 (void *)(unsigned int)pending_idx, M_RDONLY, EXT_NET_DRV); 737 n->m_data = &data[PKT_PROT_LEN]; 738 } else { 739 /* Schedule a response immediately. */ 740 netif_idx_release(pending_idx); 741 } 742 743 if ((txreq.flags & NETTXF_data_validated)) { 744 /* Tell the stack the checksums are okay */ 745 m->m_pkthdr.csum_flags |= 746 (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 747 m->m_pkthdr.csum_data = 0xffff; 748 } 749 750 /* If necessary, inform stack to compute the checksums if it forwards the packet */ 751 if ((txreq.flags & NETTXF_csum_blank)) { 752 struct ether_header *eh = mtod(m, struct ether_header *); 753 if (ntohs(eh->ether_type) == ETHERTYPE_IP) { 754 struct ip *ip = (struct ip *)&m->m_data[14]; 755 if (ip->ip_p == IPPROTO_TCP) 756 m->m_pkthdr.csum_flags |= CSUM_TCP; 757 else if (ip->ip_p == IPPROTO_UDP) 758 m->m_pkthdr.csum_flags |= CSUM_UDP; 759 } 760 } 761 762 netif->ifp->if_ibytes += m->m_pkthdr.len; 763 netif->ifp->if_ipackets++; 764 765 DDPRINTF("RECV %d bytes from %s (cflags=%x)\n", 766 m->m_pkthdr.len, IFNAME(netif), m->m_pkthdr.csum_flags); 767 DPRINTF_MBUF_LEN(m, 128); 768 769 (*netif->ifp->if_input)(netif->ifp, m); 770 771 mop++; 772 } 773 } 774 775 /* Handle interrupt from a frontend */ 776 static void 777 netback_intr(void *arg) 778 { 779 netif_t *netif = arg; 780 DDPRINTF("%s\n", IFNAME(netif)); 781 add_to_tx_schedule_list_tail(netif); 782 maybe_schedule_tx_action(); 783 } 784 785 /* Removes netif from front of list and does not call netif_put() (caller must) */ 786 static netif_t * 787 remove_from_rx_schedule_list(void) 788 { 789 netif_t *netif; 790 791 mtx_lock(&rx_sched_list_lock); 792 793 if ((netif = STAILQ_FIRST(&rx_sched_list))) { 794 STAILQ_REMOVE(&rx_sched_list, netif, netback_info, next_rx); 795 STAILQ_NEXT(netif, next_rx) = NULL; 796 netif->on_rx_sched_list = 0; 797 } 798 799 mtx_unlock(&rx_sched_list_lock); 800 801 return netif; 802 } 803 804 /* Adds netif to end of list and calls netif_get() */ 805 static void 806 add_to_rx_schedule_list_tail(netif_t *netif) 807 { 808 if (netif->on_rx_sched_list) 809 return; 810 811 mtx_lock(&rx_sched_list_lock); 812 if (!netif->on_rx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) { 813 netif_get(netif); 814 STAILQ_INSERT_TAIL(&rx_sched_list, netif, next_rx); 815 netif->on_rx_sched_list = 1; 816 } 817 mtx_unlock(&rx_sched_list_lock); 818 } 819 820 static int 821 make_rx_response(netif_t *netif, uint16_t id, int8_t st, 822 uint16_t offset, uint16_t size, uint16_t flags) 823 { 824 RING_IDX i = netif->rx.rsp_prod_pvt; 825 netif_rx_response_t *resp; 826 int notify; 827 828 resp = RING_GET_RESPONSE(&netif->rx, i); 829 resp->offset = offset; 830 resp->flags = flags; 831 resp->id = id; 832 resp->status = (int16_t)size; 833 if (st < 0) 834 resp->status = (int16_t)st; 835 836 DDPRINTF("rx resp(%d): off=%x fl=%x id=%x stat=%d\n", 837 i, resp->offset, resp->flags, resp->id, resp->status); 838 839 netif->rx.rsp_prod_pvt = ++i; 840 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify); 841 842 return notify; 843 } 844 845 static int 846 netif_rx(netif_t *netif) 847 { 848 struct ifnet *ifp = netif->ifp; 849 struct mbuf *m; 850 multicall_entry_t *mcl; 851 mmu_update_t *mmu; 852 gnttab_transfer_t *gop; 853 unsigned long vdata, old_mfn, new_mfn; 854 struct mbuf *rxq = NULL, *rxq_last = NULL; 855 int ret, notify = 0, pkts_dequeued = 0; 856 857 DDPRINTF("%s\n", IFNAME(netif)); 858 859 mcl = rx_mcl; 860 mmu = rx_mmu; 861 gop = grant_rx_op; 862 863 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 864 865 /* Quit if the target domain has no receive buffers */ 866 if (netif->rx.req_cons == netif->rx.sring->req_prod) 867 break; 868 869 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 870 if (m == NULL) 871 break; 872 873 pkts_dequeued++; 874 875 /* Check if we need to copy the data */ 876 if (((m->m_flags & (M_RDONLY|M_EXT)) != M_EXT) || 877 (*m->m_ext.ref_cnt > 1) || m->m_next != NULL) { 878 struct mbuf *n; 879 880 DDPRINTF("copying mbuf (fl=%x ext=%x rc=%d n=%x)\n", 881 m->m_flags, 882 (m->m_flags & M_EXT) ? m->m_ext.ext_type : 0, 883 (m->m_flags & M_EXT) ? *m->m_ext.ref_cnt : 0, 884 (unsigned int)m->m_next); 885 886 /* Make copy */ 887 MGETHDR(n, M_DONTWAIT, MT_DATA); 888 if (!n) 889 goto drop; 890 891 MCLGET(n, M_DONTWAIT); 892 if (!(n->m_flags & M_EXT)) { 893 m_freem(n); 894 goto drop; 895 } 896 897 /* Leave space at front and keep current alignment */ 898 n->m_data += 16 + ((unsigned int)m->m_data & 0x3); 899 900 if (m->m_pkthdr.len > M_TRAILINGSPACE(n)) { 901 WPRINTF("pkt to big %d\n", m->m_pkthdr.len); 902 m_freem(n); 903 goto drop; 904 } 905 m_copydata(m, 0, m->m_pkthdr.len, n->m_data); 906 n->m_pkthdr.len = n->m_len = m->m_pkthdr.len; 907 n->m_pkthdr.csum_flags = (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA); 908 m_freem(m); 909 m = n; 910 } 911 912 vdata = (unsigned long)m->m_data; 913 old_mfn = vtomach(vdata) >> PAGE_SHIFT; 914 915 if ((new_mfn = alloc_mfn()) == 0) 916 goto drop; 917 918 #ifdef XEN_NETBACK_FIXUP_CSUM 919 /* Check if we need to compute a checksum. This happens */ 920 /* when bridging from one domain to another. */ 921 if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) || 922 (m->m_pkthdr.csum_flags & CSUM_SCTP)) 923 fixup_checksum(m); 924 #endif 925 926 xen_phys_machine[(vtophys(vdata) >> PAGE_SHIFT)] = new_mfn; 927 928 mcl->op = __HYPERVISOR_update_va_mapping; 929 mcl->args[0] = vdata; 930 mcl->args[1] = (new_mfn << PAGE_SHIFT) | PG_V | PG_RW | PG_M | PG_A; 931 mcl->args[2] = 0; 932 mcl->args[3] = 0; 933 mcl++; 934 935 gop->mfn = old_mfn; 936 gop->domid = netif->domid; 937 gop->ref = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons)->gref; 938 netif->rx.req_cons++; 939 gop++; 940 941 mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; 942 mmu->val = vtophys(vdata) >> PAGE_SHIFT; 943 mmu++; 944 945 if (rxq_last) 946 rxq_last->m_nextpkt = m; 947 else 948 rxq = m; 949 rxq_last = m; 950 951 DDPRINTF("XMIT %d bytes to %s\n", m->m_pkthdr.len, IFNAME(netif)); 952 DPRINTF_MBUF_LEN(m, 128); 953 954 /* Filled the batch queue? */ 955 if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op)) 956 break; 957 958 continue; 959 drop: 960 DDPRINTF("dropping pkt\n"); 961 ifp->if_oerrors++; 962 m_freem(m); 963 } 964 965 if (mcl == rx_mcl) 966 return pkts_dequeued; 967 968 mcl->op = __HYPERVISOR_mmu_update; 969 mcl->args[0] = (unsigned long)rx_mmu; 970 mcl->args[1] = mmu - rx_mmu; 971 mcl->args[2] = 0; 972 mcl->args[3] = DOMID_SELF; 973 mcl++; 974 975 mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; 976 ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl); 977 BUG_ON(ret != 0); 978 979 ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, gop - grant_rx_op); 980 BUG_ON(ret != 0); 981 982 mcl = rx_mcl; 983 gop = grant_rx_op; 984 985 while ((m = rxq) != NULL) { 986 int8_t status; 987 uint16_t id, flags = 0; 988 989 rxq = m->m_nextpkt; 990 m->m_nextpkt = NULL; 991 992 /* Rederive the machine addresses. */ 993 new_mfn = mcl->args[1] >> PAGE_SHIFT; 994 old_mfn = gop->mfn; 995 996 ifp->if_obytes += m->m_pkthdr.len; 997 ifp->if_opackets++; 998 999 /* The update_va_mapping() must not fail. */ 1000 BUG_ON(mcl->result != 0); 1001 1002 /* Setup flags */ 1003 if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) 1004 flags |= NETRXF_csum_blank | NETRXF_data_validated; 1005 else if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) 1006 flags |= NETRXF_data_validated; 1007 1008 /* Check the reassignment error code. */ 1009 status = NETIF_RSP_OKAY; 1010 if (gop->status != 0) { 1011 DPRINTF("Bad status %d from grant transfer to DOM%u\n", 1012 gop->status, netif->domid); 1013 /* 1014 * Page no longer belongs to us unless GNTST_bad_page, 1015 * but that should be a fatal error anyway. 1016 */ 1017 BUG_ON(gop->status == GNTST_bad_page); 1018 status = NETIF_RSP_ERROR; 1019 } 1020 id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id; 1021 notify |= make_rx_response(netif, id, status, 1022 (unsigned long)m->m_data & PAGE_MASK, 1023 m->m_pkthdr.len, flags); 1024 1025 m_freem(m); 1026 mcl++; 1027 gop++; 1028 } 1029 1030 if (notify) 1031 notify_remote_via_irq(netif->irq); 1032 1033 return pkts_dequeued; 1034 } 1035 1036 static void 1037 rx_task_timer(void *arg) 1038 { 1039 DDPRINTF("\n"); 1040 taskqueue_enqueue(taskqueue_swi, &net_rx_task); 1041 } 1042 1043 static void 1044 net_rx_action(void *context, int pending) 1045 { 1046 netif_t *netif, *last_zero_work = NULL; 1047 1048 DDPRINTF("\n"); 1049 1050 while ((netif = remove_from_rx_schedule_list())) { 1051 struct ifnet *ifp = netif->ifp; 1052 1053 if (netif == last_zero_work) { 1054 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) 1055 add_to_rx_schedule_list_tail(netif); 1056 netif_put(netif); 1057 if (!STAILQ_EMPTY(&rx_sched_list)) 1058 callout_reset(&rx_task_callout, 1, rx_task_timer, NULL); 1059 break; 1060 } 1061 1062 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { 1063 if (netif_rx(netif)) 1064 last_zero_work = NULL; 1065 else if (!last_zero_work) 1066 last_zero_work = netif; 1067 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) 1068 add_to_rx_schedule_list_tail(netif); 1069 } 1070 1071 netif_put(netif); 1072 } 1073 } 1074 1075 static void 1076 netback_start(struct ifnet *ifp) 1077 { 1078 netif_t *netif = (netif_t *)ifp->if_softc; 1079 1080 DDPRINTF("%s\n", IFNAME(netif)); 1081 1082 add_to_rx_schedule_list_tail(netif); 1083 taskqueue_enqueue(taskqueue_swi, &net_rx_task); 1084 } 1085 1086 /* Map a grant ref to a ring */ 1087 static int 1088 map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring) 1089 { 1090 struct gnttab_map_grant_ref op; 1091 1092 ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE); 1093 if (ring->va == 0) 1094 return ENOMEM; 1095 1096 op.host_addr = ring->va; 1097 op.flags = GNTMAP_host_map; 1098 op.ref = ref; 1099 op.dom = dom; 1100 HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); 1101 if (op.status) { 1102 WPRINTF("grant table op err=%d\n", op.status); 1103 kmem_free(kernel_map, ring->va, PAGE_SIZE); 1104 ring->va = 0; 1105 return EACCES; 1106 } 1107 1108 ring->handle = op.handle; 1109 ring->bus_addr = op.dev_bus_addr; 1110 1111 return 0; 1112 } 1113 1114 /* Unmap grant ref for a ring */ 1115 static void 1116 unmap_ring(struct ring_ref *ring) 1117 { 1118 struct gnttab_unmap_grant_ref op; 1119 1120 op.host_addr = ring->va; 1121 op.dev_bus_addr = ring->bus_addr; 1122 op.handle = ring->handle; 1123 HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); 1124 if (op.status) 1125 WPRINTF("grant table op err=%d\n", op.status); 1126 1127 kmem_free(kernel_map, ring->va, PAGE_SIZE); 1128 ring->va = 0; 1129 } 1130 1131 static int 1132 connect_rings(netif_t *netif) 1133 { 1134 struct xenbus_device *xdev = netif->xdev; 1135 netif_tx_sring_t *txs; 1136 netif_rx_sring_t *rxs; 1137 unsigned long tx_ring_ref, rx_ring_ref; 1138 evtchn_port_t evtchn; 1139 evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain }; 1140 int err; 1141 1142 // Grab FE data and map his memory 1143 err = xenbus_gather(NULL, xdev->otherend, 1144 "tx-ring-ref", "%lu", &tx_ring_ref, 1145 "rx-ring-ref", "%lu", &rx_ring_ref, 1146 "event-channel", "%u", &evtchn, NULL); 1147 if (err) { 1148 xenbus_dev_fatal(xdev, err, 1149 "reading %s/ring-ref and event-channel", 1150 xdev->otherend); 1151 return err; 1152 } 1153 1154 err = map_ring(tx_ring_ref, netif->domid, &netif->tx_ring_ref); 1155 if (err) { 1156 xenbus_dev_fatal(xdev, err, "mapping tx ring"); 1157 return err; 1158 } 1159 txs = (netif_tx_sring_t *)netif->tx_ring_ref.va; 1160 BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE); 1161 1162 err = map_ring(rx_ring_ref, netif->domid, &netif->rx_ring_ref); 1163 if (err) { 1164 unmap_ring(&netif->tx_ring_ref); 1165 xenbus_dev_fatal(xdev, err, "mapping rx ring"); 1166 return err; 1167 } 1168 rxs = (netif_rx_sring_t *)netif->rx_ring_ref.va; 1169 BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE); 1170 1171 op.u.bind_interdomain.remote_dom = netif->domid; 1172 op.u.bind_interdomain.remote_port = evtchn; 1173 err = HYPERVISOR_event_channel_op(&op); 1174 if (err) { 1175 unmap_ring(&netif->tx_ring_ref); 1176 unmap_ring(&netif->rx_ring_ref); 1177 xenbus_dev_fatal(xdev, err, "binding event channel"); 1178 return err; 1179 } 1180 netif->evtchn = op.u.bind_interdomain.local_port; 1181 1182 /* bind evtchn to irq handler */ 1183 netif->irq = 1184 bind_evtchn_to_irqhandler(netif->evtchn, "netback", 1185 netback_intr, netif, INTR_TYPE_NET|INTR_MPSAFE, &netif->irq_cookie); 1186 1187 netif->rings_connected = 1; 1188 1189 DPRINTF("%s connected! evtchn=%d irq=%d\n", 1190 IFNAME(netif), netif->evtchn, netif->irq); 1191 1192 return 0; 1193 } 1194 1195 static void 1196 disconnect_rings(netif_t *netif) 1197 { 1198 DPRINTF("\n"); 1199 1200 if (netif->rings_connected) { 1201 unbind_from_irqhandler(netif->irq, netif->irq_cookie); 1202 netif->irq = 0; 1203 unmap_ring(&netif->tx_ring_ref); 1204 unmap_ring(&netif->rx_ring_ref); 1205 netif->rings_connected = 0; 1206 } 1207 } 1208 1209 static void 1210 connect(netif_t *netif) 1211 { 1212 if (!netif->xdev || 1213 !netif->attached || 1214 netif->frontend_state != XenbusStateConnected) { 1215 return; 1216 } 1217 1218 if (!connect_rings(netif)) { 1219 xenbus_switch_state(netif->xdev, NULL, XenbusStateConnected); 1220 1221 /* Turn on interface */ 1222 netif->ifp->if_drv_flags |= IFF_DRV_RUNNING; 1223 netif->ifp->if_flags |= IFF_UP; 1224 } 1225 } 1226 1227 static int 1228 netback_remove(struct xenbus_device *xdev) 1229 { 1230 netif_t *netif = xdev->data; 1231 device_t ndev; 1232 1233 DPRINTF("remove %s\n", xdev->nodename); 1234 1235 if ((ndev = netif->ndev)) { 1236 netif->ndev = NULL; 1237 mtx_lock(&Giant); 1238 device_detach(ndev); 1239 mtx_unlock(&Giant); 1240 } 1241 1242 xdev->data = NULL; 1243 netif->xdev = NULL; 1244 netif_put(netif); 1245 1246 return 0; 1247 } 1248 1249 /** 1250 * Entry point to this code when a new device is created. Allocate the basic 1251 * structures and the ring buffers for communication with the frontend. 1252 * Switch to Connected state. 1253 */ 1254 static int 1255 netback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id) 1256 { 1257 int err; 1258 long handle; 1259 char *bridge; 1260 1261 DPRINTF("node=%s\n", xdev->nodename); 1262 1263 /* Grab the handle */ 1264 err = xenbus_scanf(NULL, xdev->nodename, "handle", "%li", &handle); 1265 if (err != 1) { 1266 xenbus_dev_fatal(xdev, err, "reading handle"); 1267 return err; 1268 } 1269 1270 /* Check for bridge */ 1271 bridge = xenbus_read(NULL, xdev->nodename, "bridge", NULL); 1272 if (IS_ERR(bridge)) 1273 bridge = NULL; 1274 1275 err = xenbus_switch_state(xdev, NULL, XenbusStateInitWait); 1276 if (err) { 1277 xenbus_dev_fatal(xdev, err, "writing switch state"); 1278 return err; 1279 } 1280 1281 err = netif_create(handle, xdev, bridge); 1282 if (err) { 1283 xenbus_dev_fatal(xdev, err, "creating netif"); 1284 return err; 1285 } 1286 1287 err = vif_add_dev(xdev); 1288 if (err) { 1289 netif_put((netif_t *)xdev->data); 1290 xenbus_dev_fatal(xdev, err, "adding vif device"); 1291 return err; 1292 } 1293 1294 return 0; 1295 } 1296 1297 /** 1298 * We are reconnecting to the backend, due to a suspend/resume, or a backend 1299 * driver restart. We tear down our netif structure and recreate it, but 1300 * leave the device-layer structures intact so that this is transparent to the 1301 * rest of the kernel. 1302 */ 1303 static int netback_resume(struct xenbus_device *xdev) 1304 { 1305 DPRINTF("node=%s\n", xdev->nodename); 1306 return 0; 1307 } 1308 1309 1310 /** 1311 * Callback received when the frontend's state changes. 1312 */ 1313 static void frontend_changed(struct xenbus_device *xdev, 1314 XenbusState frontend_state) 1315 { 1316 netif_t *netif = xdev->data; 1317 1318 DPRINTF("state=%d\n", frontend_state); 1319 1320 netif->frontend_state = frontend_state; 1321 1322 switch (frontend_state) { 1323 case XenbusStateInitialising: 1324 case XenbusStateInitialised: 1325 break; 1326 case XenbusStateConnected: 1327 connect(netif); 1328 break; 1329 case XenbusStateClosing: 1330 xenbus_switch_state(xdev, NULL, XenbusStateClosing); 1331 break; 1332 case XenbusStateClosed: 1333 xenbus_remove_device(xdev); 1334 break; 1335 case XenbusStateUnknown: 1336 case XenbusStateInitWait: 1337 xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend", 1338 frontend_state); 1339 break; 1340 } 1341 } 1342 1343 /* ** Driver registration ** */ 1344 1345 static struct xenbus_device_id netback_ids[] = { 1346 { "vif" }, 1347 { "" } 1348 }; 1349 1350 static struct xenbus_driver netback = { 1351 .name = "netback", 1352 .ids = netback_ids, 1353 .probe = netback_probe, 1354 .remove = netback_remove, 1355 .resume= netback_resume, 1356 .otherend_changed = frontend_changed, 1357 }; 1358 1359 static void 1360 netback_init(void *unused) 1361 { 1362 callout_init(&rx_task_callout, CALLOUT_MPSAFE); 1363 1364 mmap_vstart = alloc_empty_page_range(MAX_PENDING_REQS); 1365 BUG_ON(!mmap_vstart); 1366 1367 pending_cons = 0; 1368 for (pending_prod = 0; pending_prod < MAX_PENDING_REQS; pending_prod++) 1369 pending_ring[pending_prod] = pending_prod; 1370 1371 TASK_INIT(&net_tx_task, 0, net_tx_action, NULL); 1372 TASK_INIT(&net_rx_task, 0, net_rx_action, NULL); 1373 mtx_init(&tx_sched_list_lock, "nb_tx_sched_lock", "netback tx sched lock", MTX_DEF); 1374 mtx_init(&rx_sched_list_lock, "nb_rx_sched_lock", "netback rx sched lock", MTX_DEF); 1375 1376 DPRINTF("registering %s\n", netback.name); 1377 1378 xenbus_register_backend(&netback); 1379 } 1380 1381 SYSINIT(xnbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, netback_init, NULL) 1382 1383 static int 1384 vif_add_dev(struct xenbus_device *xdev) 1385 { 1386 netif_t *netif = xdev->data; 1387 device_t nexus, ndev; 1388 devclass_t dc; 1389 int err = 0; 1390 1391 mtx_lock(&Giant); 1392 1393 /* We will add a vif device as a child of nexus0 (for now) */ 1394 if (!(dc = devclass_find("nexus")) || 1395 !(nexus = devclass_get_device(dc, 0))) { 1396 WPRINTF("could not find nexus0!\n"); 1397 err = ENOENT; 1398 goto done; 1399 } 1400 1401 1402 /* Create a newbus device representing the vif */ 1403 ndev = BUS_ADD_CHILD(nexus, 0, "vif", netif->ifp->if_dunit); 1404 if (!ndev) { 1405 WPRINTF("could not create newbus device %s!\n", IFNAME(netif)); 1406 err = EFAULT; 1407 goto done; 1408 } 1409 1410 netif_get(netif); 1411 device_set_ivars(ndev, netif); 1412 netif->ndev = ndev; 1413 1414 device_probe_and_attach(ndev); 1415 1416 done: 1417 1418 mtx_unlock(&Giant); 1419 1420 return err; 1421 } 1422 1423 enum { 1424 VIF_SYSCTL_DOMID, 1425 VIF_SYSCTL_HANDLE, 1426 VIF_SYSCTL_TXRING, 1427 VIF_SYSCTL_RXRING, 1428 }; 1429 1430 static char * 1431 vif_sysctl_ring_info(netif_t *netif, int cmd) 1432 { 1433 char *buf = malloc(256, M_DEVBUF, M_WAITOK); 1434 if (buf) { 1435 if (!netif->rings_connected) 1436 sprintf(buf, "rings not connected\n"); 1437 else if (cmd == VIF_SYSCTL_TXRING) { 1438 netif_tx_back_ring_t *tx = &netif->tx; 1439 sprintf(buf, "nr_ents=%x req_cons=%x" 1440 " req_prod=%x req_event=%x" 1441 " rsp_prod=%x rsp_event=%x", 1442 tx->nr_ents, tx->req_cons, 1443 tx->sring->req_prod, tx->sring->req_event, 1444 tx->sring->rsp_prod, tx->sring->rsp_event); 1445 } else { 1446 netif_rx_back_ring_t *rx = &netif->rx; 1447 sprintf(buf, "nr_ents=%x req_cons=%x" 1448 " req_prod=%x req_event=%x" 1449 " rsp_prod=%x rsp_event=%x", 1450 rx->nr_ents, rx->req_cons, 1451 rx->sring->req_prod, rx->sring->req_event, 1452 rx->sring->rsp_prod, rx->sring->rsp_event); 1453 } 1454 } 1455 return buf; 1456 } 1457 1458 static int 1459 vif_sysctl_handler(SYSCTL_HANDLER_ARGS) 1460 { 1461 device_t dev = (device_t)arg1; 1462 netif_t *netif = (netif_t *)device_get_ivars(dev); 1463 const char *value; 1464 char *buf = NULL; 1465 int err; 1466 1467 switch (arg2) { 1468 case VIF_SYSCTL_DOMID: 1469 return sysctl_handle_int(oidp, NULL, netif->domid, req); 1470 case VIF_SYSCTL_HANDLE: 1471 return sysctl_handle_int(oidp, NULL, netif->handle, req); 1472 case VIF_SYSCTL_TXRING: 1473 case VIF_SYSCTL_RXRING: 1474 value = buf = vif_sysctl_ring_info(netif, arg2); 1475 break; 1476 default: 1477 return (EINVAL); 1478 } 1479 1480 err = SYSCTL_OUT(req, value, strlen(value)); 1481 if (buf != NULL) 1482 free(buf, M_DEVBUF); 1483 1484 return err; 1485 } 1486 1487 /* Newbus vif device driver probe */ 1488 static int 1489 vif_probe(device_t dev) 1490 { 1491 DDPRINTF("vif%d\n", device_get_unit(dev)); 1492 return 0; 1493 } 1494 1495 /* Newbus vif device driver attach */ 1496 static int 1497 vif_attach(device_t dev) 1498 { 1499 netif_t *netif = (netif_t *)device_get_ivars(dev); 1500 uint8_t mac[ETHER_ADDR_LEN]; 1501 1502 DDPRINTF("%s\n", IFNAME(netif)); 1503 1504 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 1505 OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD, 1506 dev, VIF_SYSCTL_DOMID, vif_sysctl_handler, "I", 1507 "domid of frontend"); 1508 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 1509 OID_AUTO, "handle", CTLTYPE_INT|CTLFLAG_RD, 1510 dev, VIF_SYSCTL_HANDLE, vif_sysctl_handler, "I", 1511 "handle of frontend"); 1512 #ifdef XEN_NETBACK_DEBUG 1513 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 1514 OID_AUTO, "txring", CTLTYPE_STRING | CTLFLAG_RD, 1515 dev, VIF_SYSCTL_TXRING, vif_sysctl_handler, "A", 1516 "tx ring info"); 1517 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 1518 OID_AUTO, "rxring", CTLTYPE_STRING | CTLFLAG_RD, 1519 dev, VIF_SYSCTL_RXRING, vif_sysctl_handler, "A", 1520 "rx ring info"); 1521 #endif 1522 1523 memset(mac, 0xff, sizeof(mac)); 1524 mac[0] &= ~0x01; 1525 1526 ether_ifattach(netif->ifp, mac); 1527 netif->attached = 1; 1528 1529 connect(netif); 1530 1531 if (netif->bridge) { 1532 DPRINTF("Adding %s to bridge %s\n", IFNAME(netif), netif->bridge); 1533 int err = add_to_bridge(netif->ifp, netif->bridge); 1534 if (err) { 1535 WPRINTF("Error adding %s to %s; err=%d\n", 1536 IFNAME(netif), netif->bridge, err); 1537 } 1538 } 1539 1540 return bus_generic_attach(dev); 1541 } 1542 1543 /* Newbus vif device driver detach */ 1544 static int 1545 vif_detach(device_t dev) 1546 { 1547 netif_t *netif = (netif_t *)device_get_ivars(dev); 1548 struct ifnet *ifp = netif->ifp; 1549 1550 DDPRINTF("%s\n", IFNAME(netif)); 1551 1552 /* Tell the stack that the interface is no longer active */ 1553 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 1554 1555 ether_ifdetach(ifp); 1556 1557 bus_generic_detach(dev); 1558 1559 netif->attached = 0; 1560 1561 netif_put(netif); 1562 1563 return 0; 1564 } 1565 1566 static device_method_t vif_methods[] = { 1567 /* Device interface */ 1568 DEVMETHOD(device_probe, vif_probe), 1569 DEVMETHOD(device_attach, vif_attach), 1570 DEVMETHOD(device_detach, vif_detach), 1571 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1572 DEVMETHOD(device_suspend, bus_generic_suspend), 1573 DEVMETHOD(device_resume, bus_generic_resume), 1574 {0, 0} 1575 }; 1576 1577 static devclass_t vif_devclass; 1578 1579 static driver_t vif_driver = { 1580 "vif", 1581 vif_methods, 1582 0, 1583 }; 1584 1585 DRIVER_MODULE(vif, nexus, vif_driver, vif_devclass, 0, 0); 1586 1587 1588 /* 1589 * Local variables: 1590 * mode: C 1591 * c-set-style: "BSD" 1592 * c-basic-offset: 4 1593 * tab-width: 4 1594 * indent-tabs-mode: t 1595 * End: 1596 */ 1597