xref: /freebsd/sys/dev/xen/netback/netback.c (revision bc093719ca478fe10b938cef32c30b528042cbcd)
1 /*
2  * Copyright (c) 2006, Cisco Systems, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors
15  *    may be used to endorse or promote products derived from this software
16  *    without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/sockio.h>
37 #include <sys/mbuf.h>
38 #include <sys/malloc.h>
39 #include <sys/kernel.h>
40 #include <sys/socket.h>
41 #include <sys/queue.h>
42 #include <sys/taskqueue.h>
43 
44 #include <sys/module.h>
45 #include <sys/bus.h>
46 #include <sys/sysctl.h>
47 
48 #include <net/if.h>
49 #include <net/if_arp.h>
50 #include <net/if_types.h>
51 #include <net/ethernet.h>
52 #include <net/if_bridgevar.h>
53 
54 #include <netinet/in_systm.h>
55 #include <netinet/in.h>
56 #include <netinet/in_var.h>
57 #include <netinet/ip.h>
58 #include <netinet/tcp.h>
59 #include <netinet/udp.h>
60 
61 #include <vm/vm_extern.h>
62 #include <vm/vm_kern.h>
63 
64 #include <machine/in_cksum.h>
65 #include <machine/xen-os.h>
66 #include <machine/hypervisor.h>
67 #include <machine/hypervisor-ifs.h>
68 #include <machine/xen_intr.h>
69 #include <machine/evtchn.h>
70 #include <machine/xenbus.h>
71 #include <machine/gnttab.h>
72 #include <machine/xen-public/memory.h>
73 #include <dev/xen/xenbus/xenbus_comms.h>
74 
75 
76 #ifdef XEN_NETBACK_DEBUG
77 #define DPRINTF(fmt, args...) \
78     printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
79 #else
80 #define DPRINTF(fmt, args...) ((void)0)
81 #endif
82 
83 #ifdef XEN_NETBACK_DEBUG_LOTS
84 #define DDPRINTF(fmt, args...) \
85     printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
86 #define DPRINTF_MBUF(_m) print_mbuf(_m, 0)
87 #define DPRINTF_MBUF_LEN(_m, _len) print_mbuf(_m, _len)
88 #else
89 #define DDPRINTF(fmt, args...) ((void)0)
90 #define DPRINTF_MBUF(_m) ((void)0)
91 #define DPRINTF_MBUF_LEN(_m, _len) ((void)0)
92 #endif
93 
94 #define WPRINTF(fmt, args...) \
95     printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
96 
97 #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
98 #define BUG_ON PANIC_IF
99 
100 #define IFNAME(_np) (_np)->ifp->if_xname
101 
102 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
103 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
104 
105 struct ring_ref {
106 	vm_offset_t va;
107 	grant_handle_t handle;
108 	uint64_t bus_addr;
109 };
110 
111 typedef struct netback_info {
112 
113 	/* Schedule lists */
114 	STAILQ_ENTRY(netback_info) next_tx;
115 	STAILQ_ENTRY(netback_info) next_rx;
116 	int on_tx_sched_list;
117 	int on_rx_sched_list;
118 
119 	struct xenbus_device *xdev;
120 	XenbusState frontend_state;
121 
122 	domid_t domid;
123 	int handle;
124 	char *bridge;
125 
126 	int rings_connected;
127 	struct ring_ref tx_ring_ref;
128 	struct ring_ref rx_ring_ref;
129 	netif_tx_back_ring_t tx;
130 	netif_rx_back_ring_t rx;
131 	evtchn_port_t evtchn;
132 	int irq;
133 	void *irq_cookie;
134 
135 	struct ifnet *ifp;
136 	int ref_cnt;
137 
138 	device_t ndev;
139 	int attached;
140 } netif_t;
141 
142 
143 #define MAX_PENDING_REQS 256
144 #define PKT_PROT_LEN 64
145 
146 static struct {
147 	netif_tx_request_t req;
148 	netif_t *netif;
149 } pending_tx_info[MAX_PENDING_REQS];
150 static uint16_t pending_ring[MAX_PENDING_REQS];
151 typedef unsigned int PEND_RING_IDX;
152 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
153 static PEND_RING_IDX pending_prod, pending_cons;
154 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
155 
156 static unsigned long mmap_vstart;
157 #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
158 
159 /* Freed TX mbufs get batched on this ring before return to pending_ring. */
160 static uint16_t dealloc_ring[MAX_PENDING_REQS];
161 static PEND_RING_IDX dealloc_prod, dealloc_cons;
162 
163 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
164 static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
165 static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE];
166 
167 static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
168 static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
169 static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
170 
171 static struct task net_tx_task, net_rx_task;
172 static struct callout rx_task_callout;
173 
174 static STAILQ_HEAD(netback_tx_sched_list, netback_info) tx_sched_list =
175 	STAILQ_HEAD_INITIALIZER(tx_sched_list);
176 static STAILQ_HEAD(netback_rx_sched_list, netback_info) rx_sched_list =
177 	STAILQ_HEAD_INITIALIZER(rx_sched_list);
178 static struct mtx tx_sched_list_lock;
179 static struct mtx rx_sched_list_lock;
180 
181 static int vif_unit_maker = 0;
182 
183 /* Protos */
184 static void netback_start(struct ifnet *ifp);
185 static int netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
186 static int vif_add_dev(struct xenbus_device *xdev);
187 static void disconnect_rings(netif_t *netif);
188 
189 #ifdef XEN_NETBACK_DEBUG_LOTS
190 /* Debug code to display the contents of an mbuf */
191 static void
192 print_mbuf(struct mbuf *m, int max)
193 {
194 	int i, j=0;
195 	printf("mbuf %08x len = %d", (unsigned int)m, m->m_pkthdr.len);
196 	for (; m; m = m->m_next) {
197 		unsigned char *d = m->m_data;
198 		for (i=0; i < m->m_len; i++) {
199 			if (max && j == max)
200 				break;
201 			if ((j++ % 16) == 0)
202 				printf("\n%04x:", j);
203 			printf(" %02x", d[i]);
204 		}
205 	}
206 	printf("\n");
207 }
208 #endif
209 
210 
211 #define MAX_MFN_ALLOC 64
212 static unsigned long mfn_list[MAX_MFN_ALLOC];
213 static unsigned int alloc_index = 0;
214 
215 static unsigned long
216 alloc_mfn(void)
217 {
218 	unsigned long mfn = 0;
219 	struct xen_memory_reservation reservation = {
220 		.extent_start = mfn_list,
221 		.nr_extents   = MAX_MFN_ALLOC,
222 		.extent_order = 0,
223 		.domid        = DOMID_SELF
224 	};
225 	if ( unlikely(alloc_index == 0) )
226 		alloc_index = HYPERVISOR_memory_op(
227 			XENMEM_increase_reservation, &reservation);
228 	if ( alloc_index != 0 )
229 		mfn = mfn_list[--alloc_index];
230 	return mfn;
231 }
232 
233 static unsigned long
234 alloc_empty_page_range(unsigned long nr_pages)
235 {
236 	void *pages;
237 	int i = 0, j = 0;
238 	multicall_entry_t mcl[17];
239 	unsigned long mfn_list[16];
240 	struct xen_memory_reservation reservation = {
241 		.extent_start = mfn_list,
242 		.nr_extents   = 0,
243 		.address_bits = 0,
244 		.extent_order = 0,
245 		.domid        = DOMID_SELF
246 	};
247 
248 	pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT);
249 	if (pages == NULL)
250 		return 0;
251 
252 	memset(mcl, 0, sizeof(mcl));
253 
254 	while (i < nr_pages) {
255 		unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE);
256 
257 		mcl[j].op = __HYPERVISOR_update_va_mapping;
258 		mcl[j].args[0] = va;
259 
260 		mfn_list[j++] = vtomach(va) >> PAGE_SHIFT;
261 
262 		xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY;
263 
264 		if (j == 16 || i == nr_pages) {
265 			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL;
266 
267 			reservation.nr_extents = j;
268 
269 			mcl[j].op = __HYPERVISOR_memory_op;
270 			mcl[j].args[0] = XENMEM_decrease_reservation;
271 			mcl[j].args[1] =  (unsigned long)&reservation;
272 
273 			(void)HYPERVISOR_multicall(mcl, j+1);
274 
275 			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0;
276 			j = 0;
277 		}
278 	}
279 
280 	return (unsigned long)pages;
281 }
282 
283 #ifdef XEN_NETBACK_FIXUP_CSUM
284 static void
285 fixup_checksum(struct mbuf *m)
286 {
287 	struct ether_header *eh = mtod(m, struct ether_header *);
288 	struct ip *ip = (struct ip *)(eh + 1);
289 	int iphlen = ip->ip_hl << 2;
290 	int iplen = ntohs(ip->ip_len);
291 
292 	if ((m->m_pkthdr.csum_flags & CSUM_TCP)) {
293 		struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iphlen);
294 		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
295 			htons(IPPROTO_TCP + (iplen - iphlen)));
296 		th->th_sum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen);
297 		m->m_pkthdr.csum_flags &= ~CSUM_TCP;
298 	} else {
299 		u_short csum;
300 		struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen);
301 		uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
302 			htons(IPPROTO_UDP + (iplen - iphlen)));
303 		if ((csum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen)) == 0)
304 			csum = 0xffff;
305 		uh->uh_sum = csum;
306 		m->m_pkthdr.csum_flags &= ~CSUM_UDP;
307 	}
308 }
309 #endif
310 
311 /* Add the interface to the specified bridge */
312 static int
313 add_to_bridge(struct ifnet *ifp, char *bridge)
314 {
315 	struct ifdrv ifd;
316 	struct ifbreq ifb;
317 	struct ifnet *ifp_bridge = ifunit(bridge);
318 
319 	if (!ifp_bridge)
320 		return ENOENT;
321 
322 	bzero(&ifd, sizeof(ifd));
323 	bzero(&ifb, sizeof(ifb));
324 
325 	strcpy(ifb.ifbr_ifsname, ifp->if_xname);
326 	strcpy(ifd.ifd_name, ifp->if_xname);
327 	ifd.ifd_cmd = BRDGADD;
328 	ifd.ifd_len = sizeof(ifb);
329 	ifd.ifd_data = &ifb;
330 
331 	return bridge_ioctl_kern(ifp_bridge, SIOCSDRVSPEC, &ifd);
332 
333 }
334 
335 static int
336 netif_create(int handle, struct xenbus_device *xdev, char *bridge)
337 {
338 	netif_t *netif;
339 	struct ifnet *ifp;
340 
341 	netif = (netif_t *)malloc(sizeof(*netif), M_DEVBUF, M_NOWAIT | M_ZERO);
342 	if (!netif)
343 		return ENOMEM;
344 
345 	netif->ref_cnt = 1;
346 	netif->handle = handle;
347 	netif->domid = xdev->otherend_id;
348 	netif->xdev = xdev;
349 	netif->bridge = bridge;
350 	xdev->data = netif;
351 
352 	/* Set up ifnet structure */
353 	ifp = netif->ifp = if_alloc(IFT_ETHER);
354 	if (!ifp) {
355 		if (bridge)
356 			free(bridge, M_DEVBUF);
357 		free(netif, M_DEVBUF);
358 		return ENOMEM;
359 	}
360 
361 	ifp->if_softc = netif;
362 	if_initname(ifp, "vif",
363 		atomic_fetchadd_int(&vif_unit_maker, 1) /* ifno */ );
364 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
365 	ifp->if_output = ether_output;
366 	ifp->if_start = netback_start;
367 	ifp->if_ioctl = netback_ioctl;
368 	ifp->if_mtu = ETHERMTU;
369 	ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
370 
371 	DPRINTF("Created %s for domid=%d handle=%d\n", IFNAME(netif), netif->domid, netif->handle);
372 
373 	return 0;
374 }
375 
376 static void
377 netif_get(netif_t *netif)
378 {
379 	atomic_add_int(&netif->ref_cnt, 1);
380 }
381 
382 static void
383 netif_put(netif_t *netif)
384 {
385 	if (atomic_fetchadd_int(&netif->ref_cnt, -1) == 1) {
386 		DPRINTF("%s\n", IFNAME(netif));
387 		disconnect_rings(netif);
388 		if (netif->ifp) {
389 			if_free(netif->ifp);
390 			netif->ifp = NULL;
391 		}
392 		if (netif->bridge)
393 			free(netif->bridge, M_DEVBUF);
394 		free(netif, M_DEVBUF);
395 	}
396 }
397 
398 static int
399 netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
400 {
401 	switch (cmd) {
402 	case SIOCSIFFLAGS:
403 	DDPRINTF("%s cmd=SIOCSIFFLAGS flags=%x\n",
404 			IFNAME((struct netback_info *)ifp->if_softc), ((struct ifreq *)data)->ifr_flags);
405 		return 0;
406 	}
407 
408 	DDPRINTF("%s cmd=%lx\n", IFNAME((struct netback_info *)ifp->if_softc), cmd);
409 
410 	return ether_ioctl(ifp, cmd, data);
411 }
412 
413 static inline void
414 maybe_schedule_tx_action(void)
415 {
416 	smp_mb();
417 	if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && !STAILQ_EMPTY(&tx_sched_list))
418 		taskqueue_enqueue(taskqueue_swi, &net_tx_task);
419 }
420 
421 /* Removes netif from front of list and does not call netif_put() (caller must) */
422 static netif_t *
423 remove_from_tx_schedule_list(void)
424 {
425 	netif_t *netif;
426 
427 	mtx_lock(&tx_sched_list_lock);
428 
429 	if ((netif = STAILQ_FIRST(&tx_sched_list))) {
430 		STAILQ_REMOVE(&tx_sched_list, netif, netback_info, next_tx);
431 		STAILQ_NEXT(netif, next_tx) = NULL;
432 		netif->on_tx_sched_list = 0;
433 	}
434 
435 	mtx_unlock(&tx_sched_list_lock);
436 
437 	return netif;
438 }
439 
440 /* Adds netif to end of list and calls netif_get() */
441 static void
442 add_to_tx_schedule_list_tail(netif_t *netif)
443 {
444 	if (netif->on_tx_sched_list)
445 		return;
446 
447 	mtx_lock(&tx_sched_list_lock);
448 	if (!netif->on_tx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
449 		netif_get(netif);
450 		STAILQ_INSERT_TAIL(&tx_sched_list, netif, next_tx);
451 		netif->on_tx_sched_list = 1;
452 	}
453 	mtx_unlock(&tx_sched_list_lock);
454 }
455 
456 /*
457  * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
458  * If this driver is pipelining transmit requests then we can be very
459  * aggressive in avoiding new-packet notifications -- frontend only needs to
460  * send a notification if there are no outstanding unreceived responses.
461  * If we may be buffer transmit buffers for any reason then we must be rather
462  * more conservative and treat this as the final check for pending work.
463  */
464 static void
465 netif_schedule_tx_work(netif_t *netif)
466 {
467 	int more_to_do;
468 
469 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
470 	more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
471 #else
472 	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
473 #endif
474 
475 	if (more_to_do) {
476 		DDPRINTF("Adding %s to tx sched list\n", IFNAME(netif));
477 		add_to_tx_schedule_list_tail(netif);
478 		maybe_schedule_tx_action();
479 	}
480 }
481 
482 static struct mtx dealloc_lock;
483 MTX_SYSINIT(netback_dealloc, &dealloc_lock, "DEALLOC LOCK", MTX_SPIN | MTX_NOWITNESS);
484 
485 static void
486 netif_idx_release(uint16_t pending_idx)
487 {
488 	mtx_lock_spin(&dealloc_lock);
489 	dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
490 	mtx_unlock_spin(&dealloc_lock);
491 
492 	taskqueue_enqueue(taskqueue_swi, &net_tx_task);
493 }
494 
495 static void
496 make_tx_response(netif_t *netif,
497 				 uint16_t    id,
498 				 int8_t      st)
499 {
500 	RING_IDX i = netif->tx.rsp_prod_pvt;
501 	netif_tx_response_t *resp;
502 	int notify;
503 
504 	resp = RING_GET_RESPONSE(&netif->tx, i);
505 	resp->id     = id;
506 	resp->status = st;
507 
508 	netif->tx.rsp_prod_pvt = ++i;
509 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
510 	if (notify)
511 		notify_remote_via_irq(netif->irq);
512 
513 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
514 	if (i == netif->tx.req_cons) {
515 		int more_to_do;
516 		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
517 		if (more_to_do)
518 			add_to_tx_schedule_list_tail(netif);
519 	}
520 #endif
521 }
522 
523 inline static void
524 net_tx_action_dealloc(void)
525 {
526 	gnttab_unmap_grant_ref_t *gop;
527 	uint16_t pending_idx;
528 	PEND_RING_IDX dc, dp;
529 	netif_t *netif;
530 	int ret;
531 
532 	dc = dealloc_cons;
533 	dp = dealloc_prod;
534 
535 	/*
536 	 * Free up any grants we have finished using
537 	 */
538 	gop = tx_unmap_ops;
539 	while (dc != dp) {
540 		pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
541 		gop->host_addr    = MMAP_VADDR(pending_idx);
542 		gop->dev_bus_addr = 0;
543 		gop->handle       = grant_tx_handle[pending_idx];
544 		gop++;
545 	}
546 	ret = HYPERVISOR_grant_table_op(
547 		GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
548 	BUG_ON(ret);
549 
550 	while (dealloc_cons != dp) {
551 		pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
552 
553 		netif = pending_tx_info[pending_idx].netif;
554 
555 		make_tx_response(netif, pending_tx_info[pending_idx].req.id,
556 				 NETIF_RSP_OKAY);
557 
558 		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
559 
560 		netif_put(netif);
561 	}
562 }
563 
564 static void
565 netif_page_release(void *buf, void *args)
566 {
567 	uint16_t pending_idx = (unsigned int)args;
568 
569 	DDPRINTF("pending_idx=%u\n", pending_idx);
570 
571 	KASSERT(pending_idx < MAX_PENDING_REQS, ("%s: bad index %u", __func__, pending_idx));
572 
573 	netif_idx_release(pending_idx);
574 }
575 
576 static void
577 net_tx_action(void *context, int pending)
578 {
579 	struct mbuf *m;
580 	netif_t *netif;
581 	netif_tx_request_t txreq;
582 	uint16_t pending_idx;
583 	RING_IDX i;
584 	gnttab_map_grant_ref_t *mop;
585 	int ret, work_to_do;
586 	struct mbuf *txq = NULL, *txq_last = NULL;
587 
588 	if (dealloc_cons != dealloc_prod)
589 		net_tx_action_dealloc();
590 
591 	mop = tx_map_ops;
592 	while ((NR_PENDING_REQS < MAX_PENDING_REQS) && !STAILQ_EMPTY(&tx_sched_list)) {
593 
594 		/* Get a netif from the list with work to do. */
595 		netif = remove_from_tx_schedule_list();
596 
597 		DDPRINTF("Processing %s (prod=%u, cons=%u)\n",
598 				IFNAME(netif), netif->tx.sring->req_prod, netif->tx.req_cons);
599 
600 		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
601 		if (!work_to_do) {
602 			netif_put(netif);
603 			continue;
604 		}
605 
606 		i = netif->tx.req_cons;
607 		rmb(); /* Ensure that we see the request before we copy it. */
608 		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
609 
610 		/* If we want credit-based scheduling, coud add it here - WORK */
611 
612 		netif->tx.req_cons++;
613 
614 		netif_schedule_tx_work(netif);
615 
616 		if (unlikely(txreq.size < ETHER_HDR_LEN) ||
617 		    unlikely(txreq.size > (ETHER_MAX_LEN-ETHER_CRC_LEN))) {
618 			WPRINTF("Bad packet size: %d\n", txreq.size);
619 			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
620 			netif_put(netif);
621 			continue;
622 		}
623 
624 		/* No crossing a page as the payload mustn't fragment. */
625 		if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) {
626 			WPRINTF("txreq.offset: %x, size: %u, end: %u\n",
627 				txreq.offset, txreq.size,
628 				(txreq.offset & PAGE_MASK) + txreq.size);
629 			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
630 			netif_put(netif);
631 			continue;
632 		}
633 
634 		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
635 
636 		MGETHDR(m, M_DONTWAIT, MT_DATA);
637 		if (!m) {
638 			WPRINTF("Failed to allocate mbuf\n");
639 			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
640 			netif_put(netif);
641 			break;
642 		}
643 		m->m_pkthdr.rcvif = netif->ifp;
644 
645 		if ((m->m_pkthdr.len = txreq.size) > PKT_PROT_LEN) {
646 			struct mbuf *n;
647 			MGET(n, M_DONTWAIT, MT_DATA);
648 			if (!(m->m_next = n)) {
649 				m_freem(m);
650 				WPRINTF("Failed to allocate second mbuf\n");
651 				make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
652 				netif_put(netif);
653 				break;
654 			}
655 			n->m_len = txreq.size - PKT_PROT_LEN;
656 			m->m_len = PKT_PROT_LEN;
657 		} else
658 			m->m_len = txreq.size;
659 
660 		mop->host_addr = MMAP_VADDR(pending_idx);
661 		mop->dom       = netif->domid;
662 		mop->ref       = txreq.gref;
663 		mop->flags     = GNTMAP_host_map | GNTMAP_readonly;
664 		mop++;
665 
666 		memcpy(&pending_tx_info[pending_idx].req,
667 		       &txreq, sizeof(txreq));
668 		pending_tx_info[pending_idx].netif = netif;
669 		*((uint16_t *)m->m_data) = pending_idx;
670 
671 		if (txq_last)
672 			txq_last->m_nextpkt = m;
673 		else
674 			txq = m;
675 		txq_last = m;
676 
677 		pending_cons++;
678 
679 		if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
680 			break;
681 	}
682 
683 	if (!txq)
684 		return;
685 
686 	ret = HYPERVISOR_grant_table_op(
687 		GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
688 	BUG_ON(ret);
689 
690 	mop = tx_map_ops;
691 	while ((m = txq) != NULL) {
692 		caddr_t data;
693 
694 		txq = m->m_nextpkt;
695 		m->m_nextpkt = NULL;
696 
697 		pending_idx = *((uint16_t *)m->m_data);
698 		netif       = pending_tx_info[pending_idx].netif;
699 		memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
700 
701 		/* Check the remap error code. */
702 		if (unlikely(mop->status)) {
703 			WPRINTF("#### netback grant fails\n");
704 			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
705 			netif_put(netif);
706 			m_freem(m);
707 			mop++;
708 			pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
709 			continue;
710 		}
711 
712 #if 0
713 		/* Can't do this in FreeBSD since vtophys() returns the pfn */
714 		/* of the remote domain who loaned us the machine page - DPT */
715 		xen_phys_machine[(vtophys(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT)] =
716 			mop->dev_bus_addr >> PAGE_SHIFT;
717 #endif
718 		grant_tx_handle[pending_idx] = mop->handle;
719 
720 		/* Setup data in mbuf (lengths are already set) */
721 		data = (caddr_t)(MMAP_VADDR(pending_idx)|txreq.offset);
722 		bcopy(data, m->m_data, m->m_len);
723 		if (m->m_next) {
724 			struct mbuf *n = m->m_next;
725 			MEXTADD(n, MMAP_VADDR(pending_idx), PAGE_SIZE, netif_page_release,
726 				(void *)(unsigned int)pending_idx, M_RDONLY, EXT_NET_DRV);
727 			n->m_data = &data[PKT_PROT_LEN];
728 		} else {
729 			/* Schedule a response immediately. */
730 			netif_idx_release(pending_idx);
731 		}
732 
733 		if ((txreq.flags & NETTXF_data_validated)) {
734 			/* Tell the stack the checksums are okay */
735 			m->m_pkthdr.csum_flags |=
736 				(CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
737 			m->m_pkthdr.csum_data = 0xffff;
738 		}
739 
740 		/* If necessary, inform stack to compute the checksums if it forwards the packet */
741 		if ((txreq.flags & NETTXF_csum_blank)) {
742 			struct ether_header *eh = mtod(m, struct ether_header *);
743 			if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
744 				struct ip *ip = (struct ip *)&m->m_data[14];
745 				if (ip->ip_p == IPPROTO_TCP)
746 					m->m_pkthdr.csum_flags |= CSUM_TCP;
747 				else if (ip->ip_p == IPPROTO_UDP)
748 					m->m_pkthdr.csum_flags |= CSUM_UDP;
749 			}
750 		}
751 
752 		netif->ifp->if_ibytes += m->m_pkthdr.len;
753 		netif->ifp->if_ipackets++;
754 
755 		DDPRINTF("RECV %d bytes from %s (cflags=%x)\n",
756 			m->m_pkthdr.len, IFNAME(netif), m->m_pkthdr.csum_flags);
757 		DPRINTF_MBUF_LEN(m, 128);
758 
759 		(*netif->ifp->if_input)(netif->ifp, m);
760 
761 		mop++;
762 	}
763 }
764 
765 /* Handle interrupt from a frontend */
766 static void
767 netback_intr(void *arg)
768 {
769 	netif_t *netif = arg;
770 	DDPRINTF("%s\n", IFNAME(netif));
771 	add_to_tx_schedule_list_tail(netif);
772 	maybe_schedule_tx_action();
773 }
774 
775 /* Removes netif from front of list and does not call netif_put() (caller must) */
776 static netif_t *
777 remove_from_rx_schedule_list(void)
778 {
779 	netif_t *netif;
780 
781 	mtx_lock(&rx_sched_list_lock);
782 
783 	if ((netif = STAILQ_FIRST(&rx_sched_list))) {
784 		STAILQ_REMOVE(&rx_sched_list, netif, netback_info, next_rx);
785 		STAILQ_NEXT(netif, next_rx) = NULL;
786 		netif->on_rx_sched_list = 0;
787 	}
788 
789 	mtx_unlock(&rx_sched_list_lock);
790 
791 	return netif;
792 }
793 
794 /* Adds netif to end of list and calls netif_get() */
795 static void
796 add_to_rx_schedule_list_tail(netif_t *netif)
797 {
798 	if (netif->on_rx_sched_list)
799 		return;
800 
801 	mtx_lock(&rx_sched_list_lock);
802 	if (!netif->on_rx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
803 		netif_get(netif);
804 		STAILQ_INSERT_TAIL(&rx_sched_list, netif, next_rx);
805 		netif->on_rx_sched_list = 1;
806 	}
807 	mtx_unlock(&rx_sched_list_lock);
808 }
809 
810 static int
811 make_rx_response(netif_t *netif, uint16_t id, int8_t st,
812 				 uint16_t offset, uint16_t size, uint16_t flags)
813 {
814 	RING_IDX i = netif->rx.rsp_prod_pvt;
815 	netif_rx_response_t *resp;
816 	int notify;
817 
818 	resp = RING_GET_RESPONSE(&netif->rx, i);
819 	resp->offset     = offset;
820 	resp->flags      = flags;
821 	resp->id         = id;
822 	resp->status     = (int16_t)size;
823 	if (st < 0)
824 		resp->status = (int16_t)st;
825 
826 	DDPRINTF("rx resp(%d): off=%x fl=%x id=%x stat=%d\n",
827 		i, resp->offset, resp->flags, resp->id, resp->status);
828 
829 	netif->rx.rsp_prod_pvt = ++i;
830 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify);
831 
832 	return notify;
833 }
834 
835 static int
836 netif_rx(netif_t *netif)
837 {
838 	struct ifnet *ifp = netif->ifp;
839 	struct mbuf *m;
840 	multicall_entry_t *mcl;
841 	mmu_update_t *mmu;
842 	gnttab_transfer_t *gop;
843 	unsigned long vdata, old_mfn, new_mfn;
844 	struct mbuf *rxq = NULL, *rxq_last = NULL;
845 	int ret, notify = 0, pkts_dequeued = 0;
846 
847 	DDPRINTF("%s\n", IFNAME(netif));
848 
849 	mcl = rx_mcl;
850 	mmu = rx_mmu;
851 	gop = grant_rx_op;
852 
853 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
854 
855 		/* Quit if the target domain has no receive buffers */
856 		if (netif->rx.req_cons == netif->rx.sring->req_prod)
857 			break;
858 
859 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
860 		if (m == NULL)
861 			break;
862 
863 		pkts_dequeued++;
864 
865 		/* Check if we need to copy the data */
866 		if (((m->m_flags & (M_RDONLY|M_EXT)) != M_EXT) ||
867 			(*m->m_ext.ref_cnt > 1) || m->m_next != NULL) {
868 			struct mbuf *n;
869 
870 			DDPRINTF("copying mbuf (fl=%x ext=%x rc=%d n=%x)\n",
871 				m->m_flags,
872 				(m->m_flags & M_EXT) ? m->m_ext.ext_type : 0,
873 				(m->m_flags & M_EXT) ? *m->m_ext.ref_cnt : 0,
874 				(unsigned int)m->m_next);
875 
876 			/* Make copy */
877 			MGETHDR(n, M_DONTWAIT, MT_DATA);
878 			if (!n)
879 				goto drop;
880 
881 			MCLGET(n, M_DONTWAIT);
882 			if (!(n->m_flags & M_EXT)) {
883 				m_freem(n);
884 				goto drop;
885 			}
886 
887 			/* Leave space at front and keep current alignment */
888 			n->m_data += 16 + ((unsigned int)m->m_data & 0x3);
889 
890 			if (m->m_pkthdr.len > M_TRAILINGSPACE(n)) {
891 				WPRINTF("pkt to big %d\n", m->m_pkthdr.len);
892 				m_freem(n);
893 				goto drop;
894 			}
895 			m_copydata(m, 0, m->m_pkthdr.len, n->m_data);
896 			n->m_pkthdr.len = n->m_len = m->m_pkthdr.len;
897 			n->m_pkthdr.csum_flags = (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA);
898 			m_freem(m);
899 			m = n;
900 		}
901 
902 		vdata = (unsigned long)m->m_data;
903 		old_mfn = vtomach(vdata) >> PAGE_SHIFT;
904 
905 		if ((new_mfn = alloc_mfn()) == 0)
906 			goto drop;
907 
908 #ifdef XEN_NETBACK_FIXUP_CSUM
909 		/* Check if we need to compute a checksum.  This happens */
910 		/* when bridging from one domain to another. */
911 		if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA))
912 			fixup_checksum(m);
913 #endif
914 
915 		xen_phys_machine[(vtophys(vdata) >> PAGE_SHIFT)] = new_mfn;
916 
917 		mcl->op = __HYPERVISOR_update_va_mapping;
918 		mcl->args[0] = vdata;
919 		mcl->args[1] = (new_mfn << PAGE_SHIFT) | PG_V | PG_RW | PG_M | PG_A;
920 		mcl->args[2] = 0;
921 		mcl->args[3] = 0;
922 		mcl++;
923 
924 		gop->mfn = old_mfn;
925 		gop->domid = netif->domid;
926 		gop->ref = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons)->gref;
927 		netif->rx.req_cons++;
928 		gop++;
929 
930 		mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
931 		mmu->val = vtophys(vdata) >> PAGE_SHIFT;
932 		mmu++;
933 
934 		if (rxq_last)
935 			rxq_last->m_nextpkt = m;
936 		else
937 			rxq = m;
938 		rxq_last = m;
939 
940 		DDPRINTF("XMIT %d bytes to %s\n", m->m_pkthdr.len, IFNAME(netif));
941 		DPRINTF_MBUF_LEN(m, 128);
942 
943 		/* Filled the batch queue? */
944 		if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op))
945 			break;
946 
947 		continue;
948 	drop:
949 		DDPRINTF("dropping pkt\n");
950 		ifp->if_oerrors++;
951 		m_freem(m);
952 	}
953 
954 	if (mcl == rx_mcl)
955 		return pkts_dequeued;
956 
957 	mcl->op = __HYPERVISOR_mmu_update;
958 	mcl->args[0] = (unsigned long)rx_mmu;
959 	mcl->args[1] = mmu - rx_mmu;
960 	mcl->args[2] = 0;
961 	mcl->args[3] = DOMID_SELF;
962 	mcl++;
963 
964 	mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
965 	ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
966 	BUG_ON(ret != 0);
967 
968 	ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, gop - grant_rx_op);
969 	BUG_ON(ret != 0);
970 
971 	mcl = rx_mcl;
972 	gop = grant_rx_op;
973 
974 	while ((m = rxq) != NULL) {
975 		int8_t status;
976 		uint16_t id, flags = 0;
977 
978 		rxq = m->m_nextpkt;
979 		m->m_nextpkt = NULL;
980 
981 		/* Rederive the machine addresses. */
982 		new_mfn = mcl->args[1] >> PAGE_SHIFT;
983 		old_mfn = gop->mfn;
984 
985 		ifp->if_obytes += m->m_pkthdr.len;
986 		ifp->if_opackets++;
987 
988 		/* The update_va_mapping() must not fail. */
989 		BUG_ON(mcl->result != 0);
990 
991 		/* Setup flags */
992 		if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA))
993 			flags |= NETRXF_csum_blank | NETRXF_data_validated;
994 		else if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID))
995 			flags |= NETRXF_data_validated;
996 
997 		/* Check the reassignment error code. */
998 		status = NETIF_RSP_OKAY;
999 		if (gop->status != 0) {
1000 			DPRINTF("Bad status %d from grant transfer to DOM%u\n",
1001 				gop->status, netif->domid);
1002 			/*
1003 			 * Page no longer belongs to us unless GNTST_bad_page,
1004 			 * but that should be a fatal error anyway.
1005 			 */
1006 			BUG_ON(gop->status == GNTST_bad_page);
1007 			status = NETIF_RSP_ERROR;
1008 		}
1009 		id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id;
1010 		notify |= make_rx_response(netif, id, status,
1011 					(unsigned long)m->m_data & PAGE_MASK,
1012 					m->m_pkthdr.len, flags);
1013 
1014 		m_freem(m);
1015 		mcl++;
1016 		gop++;
1017 	}
1018 
1019 	if (notify)
1020 		notify_remote_via_irq(netif->irq);
1021 
1022 	return pkts_dequeued;
1023 }
1024 
1025 static void
1026 rx_task_timer(void *arg)
1027 {
1028 	DDPRINTF("\n");
1029 	taskqueue_enqueue(taskqueue_swi, &net_rx_task);
1030 }
1031 
1032 static void
1033 net_rx_action(void *context, int pending)
1034 {
1035 	netif_t *netif, *last_zero_work = NULL;
1036 
1037 	DDPRINTF("\n");
1038 
1039 	while ((netif = remove_from_rx_schedule_list())) {
1040 		struct ifnet *ifp = netif->ifp;
1041 
1042 		if (netif == last_zero_work) {
1043 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1044 				add_to_rx_schedule_list_tail(netif);
1045 			netif_put(netif);
1046 			if (!STAILQ_EMPTY(&rx_sched_list))
1047 				callout_reset(&rx_task_callout, 1, rx_task_timer, NULL);
1048 			break;
1049 		}
1050 
1051 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1052 			if (netif_rx(netif))
1053 				last_zero_work = NULL;
1054 			else if (!last_zero_work)
1055 				last_zero_work = netif;
1056 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1057 				add_to_rx_schedule_list_tail(netif);
1058 		}
1059 
1060 		netif_put(netif);
1061 	}
1062 }
1063 
1064 static void
1065 netback_start(struct ifnet *ifp)
1066 {
1067 	netif_t *netif = (netif_t *)ifp->if_softc;
1068 
1069 	DDPRINTF("%s\n", IFNAME(netif));
1070 
1071 	add_to_rx_schedule_list_tail(netif);
1072 	taskqueue_enqueue(taskqueue_swi, &net_rx_task);
1073 }
1074 
1075 /* Map a grant ref to a ring */
1076 static int
1077 map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring)
1078 {
1079 	struct gnttab_map_grant_ref op;
1080 
1081 	ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
1082 	if (ring->va == 0)
1083 		return ENOMEM;
1084 
1085 	op.host_addr = ring->va;
1086 	op.flags = GNTMAP_host_map;
1087 	op.ref = ref;
1088 	op.dom = dom;
1089 	HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
1090 	if (op.status) {
1091 		WPRINTF("grant table op err=%d\n", op.status);
1092 		kmem_free(kernel_map, ring->va, PAGE_SIZE);
1093 		ring->va = 0;
1094 		return EACCES;
1095 	}
1096 
1097 	ring->handle = op.handle;
1098 	ring->bus_addr = op.dev_bus_addr;
1099 
1100 	return 0;
1101 }
1102 
1103 /* Unmap grant ref for a ring */
1104 static void
1105 unmap_ring(struct ring_ref *ring)
1106 {
1107 	struct gnttab_unmap_grant_ref op;
1108 
1109 	op.host_addr = ring->va;
1110 	op.dev_bus_addr = ring->bus_addr;
1111 	op.handle = ring->handle;
1112 	HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
1113 	if (op.status)
1114 		WPRINTF("grant table op err=%d\n", op.status);
1115 
1116 	kmem_free(kernel_map, ring->va, PAGE_SIZE);
1117 	ring->va = 0;
1118 }
1119 
1120 static int
1121 connect_rings(netif_t *netif)
1122 {
1123 	struct xenbus_device *xdev = netif->xdev;
1124 	netif_tx_sring_t *txs;
1125 	netif_rx_sring_t *rxs;
1126 	unsigned long tx_ring_ref, rx_ring_ref;
1127 	evtchn_port_t evtchn;
1128 	evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
1129 	int err;
1130 
1131 	// Grab FE data and map his memory
1132 	err = xenbus_gather(NULL, xdev->otherend,
1133 			"tx-ring-ref", "%lu", &tx_ring_ref,
1134 		    "rx-ring-ref", "%lu", &rx_ring_ref,
1135 		    "event-channel", "%u", &evtchn, NULL);
1136 	if (err) {
1137 		xenbus_dev_fatal(xdev, err,
1138 			"reading %s/ring-ref and event-channel",
1139 			xdev->otherend);
1140 		return err;
1141 	}
1142 
1143 	err = map_ring(tx_ring_ref, netif->domid, &netif->tx_ring_ref);
1144 	if (err) {
1145 		xenbus_dev_fatal(xdev, err, "mapping tx ring");
1146 		return err;
1147 	}
1148 	txs = (netif_tx_sring_t *)netif->tx_ring_ref.va;
1149 	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
1150 
1151 	err = map_ring(rx_ring_ref, netif->domid, &netif->rx_ring_ref);
1152 	if (err) {
1153 		unmap_ring(&netif->tx_ring_ref);
1154 		xenbus_dev_fatal(xdev, err, "mapping rx ring");
1155 		return err;
1156 	}
1157 	rxs = (netif_rx_sring_t *)netif->rx_ring_ref.va;
1158 	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
1159 
1160 	op.u.bind_interdomain.remote_dom = netif->domid;
1161 	op.u.bind_interdomain.remote_port = evtchn;
1162 	err = HYPERVISOR_event_channel_op(&op);
1163 	if (err) {
1164 		unmap_ring(&netif->tx_ring_ref);
1165 		unmap_ring(&netif->rx_ring_ref);
1166 		xenbus_dev_fatal(xdev, err, "binding event channel");
1167 		return err;
1168 	}
1169 	netif->evtchn = op.u.bind_interdomain.local_port;
1170 
1171 	/* bind evtchn to irq handler */
1172 	netif->irq =
1173 		bind_evtchn_to_irqhandler(netif->evtchn, "netback",
1174 			netback_intr, netif, INTR_TYPE_NET|INTR_MPSAFE, &netif->irq_cookie);
1175 
1176 	netif->rings_connected = 1;
1177 
1178 	DPRINTF("%s connected! evtchn=%d irq=%d\n",
1179 		IFNAME(netif), netif->evtchn, netif->irq);
1180 
1181 	return 0;
1182 }
1183 
1184 static void
1185 disconnect_rings(netif_t *netif)
1186 {
1187 	DPRINTF("\n");
1188 
1189 	if (netif->rings_connected) {
1190 		unbind_from_irqhandler(netif->irq, netif->irq_cookie);
1191 		netif->irq = 0;
1192 		unmap_ring(&netif->tx_ring_ref);
1193 		unmap_ring(&netif->rx_ring_ref);
1194 		netif->rings_connected = 0;
1195 	}
1196 }
1197 
1198 static void
1199 connect(netif_t *netif)
1200 {
1201 	if (!netif->xdev ||
1202 		!netif->attached ||
1203 		netif->frontend_state != XenbusStateConnected) {
1204 		return;
1205 	}
1206 
1207 	if (!connect_rings(netif)) {
1208 		xenbus_switch_state(netif->xdev, NULL, XenbusStateConnected);
1209 
1210 		/* Turn on interface */
1211 		netif->ifp->if_drv_flags |= IFF_DRV_RUNNING;
1212 		netif->ifp->if_flags |= IFF_UP;
1213 	}
1214 }
1215 
1216 static int
1217 netback_remove(struct xenbus_device *xdev)
1218 {
1219 	netif_t *netif = xdev->data;
1220 	device_t ndev;
1221 
1222 	DPRINTF("remove %s\n", xdev->nodename);
1223 
1224 	if ((ndev = netif->ndev)) {
1225 		netif->ndev = NULL;
1226 		mtx_lock(&Giant);
1227 		device_detach(ndev);
1228 		mtx_unlock(&Giant);
1229 	}
1230 
1231 	xdev->data = NULL;
1232 	netif->xdev = NULL;
1233 	netif_put(netif);
1234 
1235 	return 0;
1236 }
1237 
1238 /**
1239  * Entry point to this code when a new device is created.  Allocate the basic
1240  * structures and the ring buffers for communication with the frontend.
1241  * Switch to Connected state.
1242  */
1243 static int
1244 netback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id)
1245 {
1246 	int err;
1247 	long handle;
1248 	char *bridge;
1249 
1250 	DPRINTF("node=%s\n", xdev->nodename);
1251 
1252 	/* Grab the handle */
1253 	err = xenbus_scanf(NULL, xdev->nodename, "handle", "%li", &handle);
1254 	if (err != 1) {
1255 		xenbus_dev_fatal(xdev, err, "reading handle");
1256 		return err;
1257 	}
1258 
1259 	/* Check for bridge */
1260 	bridge = xenbus_read(NULL, xdev->nodename, "bridge", NULL);
1261 	if (IS_ERR(bridge))
1262 		bridge = NULL;
1263 
1264 	err = xenbus_switch_state(xdev, NULL, XenbusStateInitWait);
1265 	if (err) {
1266 		xenbus_dev_fatal(xdev, err, "writing switch state");
1267 		return err;
1268 	}
1269 
1270 	err = netif_create(handle, xdev, bridge);
1271 	if (err) {
1272 		xenbus_dev_fatal(xdev, err, "creating netif");
1273 		return err;
1274 	}
1275 
1276 	err = vif_add_dev(xdev);
1277 	if (err) {
1278 		netif_put((netif_t *)xdev->data);
1279 		xenbus_dev_fatal(xdev, err, "adding vif device");
1280 		return err;
1281 	}
1282 
1283 	return 0;
1284 }
1285 
1286 /**
1287  * We are reconnecting to the backend, due to a suspend/resume, or a backend
1288  * driver restart.  We tear down our netif structure and recreate it, but
1289  * leave the device-layer structures intact so that this is transparent to the
1290  * rest of the kernel.
1291  */
1292 static int netback_resume(struct xenbus_device *xdev)
1293 {
1294 	DPRINTF("node=%s\n", xdev->nodename);
1295 	return 0;
1296 }
1297 
1298 
1299 /**
1300  * Callback received when the frontend's state changes.
1301  */
1302 static void frontend_changed(struct xenbus_device *xdev,
1303 							 XenbusState frontend_state)
1304 {
1305 	netif_t *netif = xdev->data;
1306 
1307 	DPRINTF("state=%d\n", frontend_state);
1308 
1309 	netif->frontend_state = frontend_state;
1310 
1311 	switch (frontend_state) {
1312 	case XenbusStateInitialising:
1313 	case XenbusStateInitialised:
1314 		break;
1315 	case XenbusStateConnected:
1316 		connect(netif);
1317 		break;
1318 	case XenbusStateClosing:
1319 		xenbus_switch_state(xdev, NULL, XenbusStateClosing);
1320 		break;
1321 	case XenbusStateClosed:
1322 		xenbus_remove_device(xdev);
1323 		break;
1324 	case XenbusStateUnknown:
1325 	case XenbusStateInitWait:
1326 		xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend",
1327 						 frontend_state);
1328 		break;
1329 	}
1330 }
1331 
1332 /* ** Driver registration ** */
1333 
1334 static struct xenbus_device_id netback_ids[] = {
1335 	{ "vif" },
1336 	{ "" }
1337 };
1338 
1339 static struct xenbus_driver netback = {
1340 	.name = "netback",
1341 	.ids = netback_ids,
1342 	.probe = netback_probe,
1343 	.remove = netback_remove,
1344 	.resume= netback_resume,
1345 	.otherend_changed = frontend_changed,
1346 };
1347 
1348 static void
1349 netback_init(void *unused)
1350 {
1351 	callout_init(&rx_task_callout, CALLOUT_MPSAFE);
1352 
1353 	mmap_vstart = alloc_empty_page_range(MAX_PENDING_REQS);
1354 	BUG_ON(!mmap_vstart);
1355 
1356 	pending_cons = 0;
1357 	for (pending_prod = 0; pending_prod < MAX_PENDING_REQS; pending_prod++)
1358 		pending_ring[pending_prod] = pending_prod;
1359 
1360 	TASK_INIT(&net_tx_task, 0, net_tx_action, NULL);
1361 	TASK_INIT(&net_rx_task, 0, net_rx_action, NULL);
1362 	mtx_init(&tx_sched_list_lock, "nb_tx_sched_lock", "netback tx sched lock", MTX_DEF);
1363 	mtx_init(&rx_sched_list_lock, "nb_rx_sched_lock", "netback rx sched lock", MTX_DEF);
1364 
1365 	DPRINTF("registering %s\n", netback.name);
1366 
1367 	xenbus_register_backend(&netback);
1368 }
1369 
1370 SYSINIT(xnbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, netback_init, NULL)
1371 
1372 static int
1373 vif_add_dev(struct xenbus_device *xdev)
1374 {
1375 	netif_t *netif = xdev->data;
1376 	device_t nexus, ndev;
1377 	devclass_t dc;
1378 	int err = 0;
1379 
1380 	mtx_lock(&Giant);
1381 
1382 	/* We will add a vif device as a child of nexus0 (for now) */
1383 	if (!(dc = devclass_find("nexus")) ||
1384 		!(nexus = devclass_get_device(dc, 0))) {
1385 		WPRINTF("could not find nexus0!\n");
1386 		err = ENOENT;
1387 		goto done;
1388 	}
1389 
1390 
1391 	/* Create a newbus device representing the vif */
1392 	ndev = BUS_ADD_CHILD(nexus, 0, "vif", netif->ifp->if_dunit);
1393 	if (!ndev) {
1394 		WPRINTF("could not create newbus device %s!\n", IFNAME(netif));
1395 		err = EFAULT;
1396 		goto done;
1397 	}
1398 
1399 	netif_get(netif);
1400 	device_set_ivars(ndev, netif);
1401 	netif->ndev = ndev;
1402 
1403 	device_probe_and_attach(ndev);
1404 
1405  done:
1406 
1407 	mtx_unlock(&Giant);
1408 
1409 	return err;
1410 }
1411 
1412 enum {
1413 	VIF_SYSCTL_DOMID,
1414 	VIF_SYSCTL_HANDLE,
1415 	VIF_SYSCTL_TXRING,
1416 	VIF_SYSCTL_RXRING,
1417 };
1418 
1419 static char *
1420 vif_sysctl_ring_info(netif_t *netif, int cmd)
1421 {
1422 	char *buf = malloc(256, M_DEVBUF, M_WAITOK);
1423 	if (buf) {
1424 		if (!netif->rings_connected)
1425 			sprintf(buf, "rings not connected\n");
1426 		else if (cmd == VIF_SYSCTL_TXRING) {
1427 			netif_tx_back_ring_t *tx = &netif->tx;
1428 			sprintf(buf, "nr_ents=%x req_cons=%x"
1429 					" req_prod=%x req_event=%x"
1430 					" rsp_prod=%x rsp_event=%x",
1431 					tx->nr_ents, tx->req_cons,
1432 					tx->sring->req_prod, tx->sring->req_event,
1433 					tx->sring->rsp_prod, tx->sring->rsp_event);
1434 		} else {
1435 			netif_rx_back_ring_t *rx = &netif->rx;
1436 			sprintf(buf, "nr_ents=%x req_cons=%x"
1437 					" req_prod=%x req_event=%x"
1438 					" rsp_prod=%x rsp_event=%x",
1439 					rx->nr_ents, rx->req_cons,
1440 					rx->sring->req_prod, rx->sring->req_event,
1441 					rx->sring->rsp_prod, rx->sring->rsp_event);
1442 		}
1443 	}
1444 	return buf;
1445 }
1446 
1447 static int
1448 vif_sysctl_handler(SYSCTL_HANDLER_ARGS)
1449 {
1450 	device_t dev = (device_t)arg1;
1451 	netif_t *netif = (netif_t *)device_get_ivars(dev);
1452 	const char *value;
1453 	char *buf = NULL;
1454 	int err;
1455 
1456 	switch (arg2) {
1457 	case VIF_SYSCTL_DOMID:
1458 		return sysctl_handle_int(oidp, NULL, netif->domid, req);
1459 	case VIF_SYSCTL_HANDLE:
1460 		return sysctl_handle_int(oidp, NULL, netif->handle, req);
1461 	case VIF_SYSCTL_TXRING:
1462 	case VIF_SYSCTL_RXRING:
1463 		value = buf = vif_sysctl_ring_info(netif, arg2);
1464 		break;
1465 	default:
1466 		return (EINVAL);
1467 	}
1468 
1469 	err = SYSCTL_OUT(req, value, strlen(value));
1470 	if (buf != NULL)
1471 		free(buf, M_DEVBUF);
1472 
1473 	return err;
1474 }
1475 
1476 /* Newbus vif device driver probe */
1477 static int
1478 vif_probe(device_t dev)
1479 {
1480 	DDPRINTF("vif%d\n", device_get_unit(dev));
1481 	return 0;
1482 }
1483 
1484 /* Newbus vif device driver attach */
1485 static int
1486 vif_attach(device_t dev)
1487 {
1488 	netif_t *netif = (netif_t *)device_get_ivars(dev);
1489 	uint8_t mac[ETHER_ADDR_LEN];
1490 
1491 	DDPRINTF("%s\n", IFNAME(netif));
1492 
1493 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1494 	    OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD,
1495 	    dev, VIF_SYSCTL_DOMID, vif_sysctl_handler, "I",
1496 	    "domid of frontend");
1497 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1498 	    OID_AUTO, "handle", CTLTYPE_INT|CTLFLAG_RD,
1499 	    dev, VIF_SYSCTL_HANDLE, vif_sysctl_handler, "I",
1500 	    "handle of frontend");
1501 #ifdef XEN_NETBACK_DEBUG
1502 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1503 	    OID_AUTO, "txring", CTLFLAG_RD,
1504 	    dev, VIF_SYSCTL_TXRING, vif_sysctl_handler, "A",
1505 	    "tx ring info");
1506 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1507 	    OID_AUTO, "rxring", CTLFLAG_RD,
1508 	    dev, VIF_SYSCTL_RXRING, vif_sysctl_handler, "A",
1509 	    "rx ring info");
1510 #endif
1511 
1512 	memset(mac, 0xff, sizeof(mac));
1513 	mac[0] &= ~0x01;
1514 
1515 	ether_ifattach(netif->ifp, mac);
1516 	netif->attached = 1;
1517 
1518 	connect(netif);
1519 
1520 	if (netif->bridge) {
1521 		DPRINTF("Adding %s to bridge %s\n", IFNAME(netif), netif->bridge);
1522 		int err = add_to_bridge(netif->ifp, netif->bridge);
1523 		if (err) {
1524 			WPRINTF("Error adding %s to %s; err=%d\n",
1525 				IFNAME(netif), netif->bridge, err);
1526 		}
1527 	}
1528 
1529 	return bus_generic_attach(dev);
1530 }
1531 
1532 /* Newbus vif device driver detach */
1533 static int
1534 vif_detach(device_t dev)
1535 {
1536 	netif_t *netif = (netif_t *)device_get_ivars(dev);
1537 	struct ifnet *ifp = netif->ifp;
1538 
1539 	DDPRINTF("%s\n", IFNAME(netif));
1540 
1541 	/* Tell the stack that the interface is no longer active */
1542 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1543 
1544 	ether_ifdetach(ifp);
1545 
1546 	bus_generic_detach(dev);
1547 
1548 	netif->attached = 0;
1549 
1550 	netif_put(netif);
1551 
1552 	return 0;
1553 }
1554 
1555 static device_method_t vif_methods[] = {
1556 	/* Device interface */
1557 	DEVMETHOD(device_probe,		vif_probe),
1558 	DEVMETHOD(device_attach, 	vif_attach),
1559 	DEVMETHOD(device_detach,	vif_detach),
1560 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
1561 	DEVMETHOD(device_suspend,	bus_generic_suspend),
1562 	DEVMETHOD(device_resume,	bus_generic_resume),
1563 	{0, 0}
1564 };
1565 
1566 static devclass_t vif_devclass;
1567 
1568 static driver_t vif_driver = {
1569 	"vif",
1570 	vif_methods,
1571 	0,
1572 };
1573 
1574 DRIVER_MODULE(vif, nexus, vif_driver, vif_devclass, 0, 0);
1575 
1576 
1577 /*
1578  * Local variables:
1579  * mode: C
1580  * c-set-style: "BSD"
1581  * c-basic-offset: 4
1582  * tab-width: 4
1583  * indent-tabs-mode: t
1584  * End:
1585  */
1586