xref: /freebsd/sys/dev/xen/netback/netback.c (revision 70e0bbedef95258a4dadc996d641a9bebd3f107d)
1 /*
2  * Copyright (c) 2006, Cisco Systems, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors
15  *    may be used to endorse or promote products derived from this software
16  *    without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 #include "opt_sctp.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/sockio.h>
38 #include <sys/mbuf.h>
39 #include <sys/malloc.h>
40 #include <sys/kernel.h>
41 #include <sys/socket.h>
42 #include <sys/queue.h>
43 #include <sys/taskqueue.h>
44 
45 #include <sys/module.h>
46 #include <sys/bus.h>
47 #include <sys/sysctl.h>
48 
49 #include <net/if.h>
50 #include <net/if_arp.h>
51 #include <net/if_types.h>
52 #include <net/ethernet.h>
53 #include <net/if_bridgevar.h>
54 
55 #include <netinet/in_systm.h>
56 #include <netinet/in.h>
57 #include <netinet/in_var.h>
58 #include <netinet/ip.h>
59 #include <netinet/tcp.h>
60 #include <netinet/udp.h>
61 #ifdef SCTP
62 #include <netinet/sctp.h>
63 #include <netinet/sctp_crc32.h>
64 #endif
65 
66 #include <vm/vm_extern.h>
67 #include <vm/vm_kern.h>
68 
69 #include <machine/in_cksum.h>
70 #include <machine/xen-os.h>
71 #include <machine/hypervisor.h>
72 #include <machine/hypervisor-ifs.h>
73 #include <machine/xen_intr.h>
74 #include <machine/evtchn.h>
75 #include <machine/xenbus.h>
76 #include <machine/gnttab.h>
77 #include <machine/xen-public/memory.h>
78 #include <dev/xen/xenbus/xenbus_comms.h>
79 
80 
81 #ifdef XEN_NETBACK_DEBUG
82 #define DPRINTF(fmt, args...) \
83     printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
84 #else
85 #define DPRINTF(fmt, args...) ((void)0)
86 #endif
87 
88 #ifdef XEN_NETBACK_DEBUG_LOTS
89 #define DDPRINTF(fmt, args...) \
90     printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
91 #define DPRINTF_MBUF(_m) print_mbuf(_m, 0)
92 #define DPRINTF_MBUF_LEN(_m, _len) print_mbuf(_m, _len)
93 #else
94 #define DDPRINTF(fmt, args...) ((void)0)
95 #define DPRINTF_MBUF(_m) ((void)0)
96 #define DPRINTF_MBUF_LEN(_m, _len) ((void)0)
97 #endif
98 
99 #define WPRINTF(fmt, args...) \
100     printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
101 
102 #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
103 #define BUG_ON PANIC_IF
104 
105 #define IFNAME(_np) (_np)->ifp->if_xname
106 
107 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
108 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
109 
110 struct ring_ref {
111 	vm_offset_t va;
112 	grant_handle_t handle;
113 	uint64_t bus_addr;
114 };
115 
116 typedef struct netback_info {
117 
118 	/* Schedule lists */
119 	STAILQ_ENTRY(netback_info) next_tx;
120 	STAILQ_ENTRY(netback_info) next_rx;
121 	int on_tx_sched_list;
122 	int on_rx_sched_list;
123 
124 	struct xenbus_device *xdev;
125 	XenbusState frontend_state;
126 
127 	domid_t domid;
128 	int handle;
129 	char *bridge;
130 
131 	int rings_connected;
132 	struct ring_ref tx_ring_ref;
133 	struct ring_ref rx_ring_ref;
134 	netif_tx_back_ring_t tx;
135 	netif_rx_back_ring_t rx;
136 	evtchn_port_t evtchn;
137 	int irq;
138 	void *irq_cookie;
139 
140 	struct ifnet *ifp;
141 	int ref_cnt;
142 
143 	device_t ndev;
144 	int attached;
145 } netif_t;
146 
147 
148 #define MAX_PENDING_REQS 256
149 #define PKT_PROT_LEN 64
150 
151 static struct {
152 	netif_tx_request_t req;
153 	netif_t *netif;
154 } pending_tx_info[MAX_PENDING_REQS];
155 static uint16_t pending_ring[MAX_PENDING_REQS];
156 typedef unsigned int PEND_RING_IDX;
157 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
158 static PEND_RING_IDX pending_prod, pending_cons;
159 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
160 
161 static unsigned long mmap_vstart;
162 #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
163 
164 /* Freed TX mbufs get batched on this ring before return to pending_ring. */
165 static uint16_t dealloc_ring[MAX_PENDING_REQS];
166 static PEND_RING_IDX dealloc_prod, dealloc_cons;
167 
168 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
169 static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
170 static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE];
171 
172 static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
173 static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
174 static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
175 
176 static struct task net_tx_task, net_rx_task;
177 static struct callout rx_task_callout;
178 
179 static STAILQ_HEAD(netback_tx_sched_list, netback_info) tx_sched_list =
180 	STAILQ_HEAD_INITIALIZER(tx_sched_list);
181 static STAILQ_HEAD(netback_rx_sched_list, netback_info) rx_sched_list =
182 	STAILQ_HEAD_INITIALIZER(rx_sched_list);
183 static struct mtx tx_sched_list_lock;
184 static struct mtx rx_sched_list_lock;
185 
186 static int vif_unit_maker = 0;
187 
188 /* Protos */
189 static void netback_start(struct ifnet *ifp);
190 static int netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
191 static int vif_add_dev(struct xenbus_device *xdev);
192 static void disconnect_rings(netif_t *netif);
193 
194 #ifdef XEN_NETBACK_DEBUG_LOTS
195 /* Debug code to display the contents of an mbuf */
196 static void
197 print_mbuf(struct mbuf *m, int max)
198 {
199 	int i, j=0;
200 	printf("mbuf %08x len = %d", (unsigned int)m, m->m_pkthdr.len);
201 	for (; m; m = m->m_next) {
202 		unsigned char *d = m->m_data;
203 		for (i=0; i < m->m_len; i++) {
204 			if (max && j == max)
205 				break;
206 			if ((j++ % 16) == 0)
207 				printf("\n%04x:", j);
208 			printf(" %02x", d[i]);
209 		}
210 	}
211 	printf("\n");
212 }
213 #endif
214 
215 
216 #define MAX_MFN_ALLOC 64
217 static unsigned long mfn_list[MAX_MFN_ALLOC];
218 static unsigned int alloc_index = 0;
219 
220 static unsigned long
221 alloc_mfn(void)
222 {
223 	unsigned long mfn = 0;
224 	struct xen_memory_reservation reservation = {
225 		.extent_start = mfn_list,
226 		.nr_extents   = MAX_MFN_ALLOC,
227 		.extent_order = 0,
228 		.domid        = DOMID_SELF
229 	};
230 	if ( unlikely(alloc_index == 0) )
231 		alloc_index = HYPERVISOR_memory_op(
232 			XENMEM_increase_reservation, &reservation);
233 	if ( alloc_index != 0 )
234 		mfn = mfn_list[--alloc_index];
235 	return mfn;
236 }
237 
238 static unsigned long
239 alloc_empty_page_range(unsigned long nr_pages)
240 {
241 	void *pages;
242 	int i = 0, j = 0;
243 	multicall_entry_t mcl[17];
244 	unsigned long mfn_list[16];
245 	struct xen_memory_reservation reservation = {
246 		.extent_start = mfn_list,
247 		.nr_extents   = 0,
248 		.address_bits = 0,
249 		.extent_order = 0,
250 		.domid        = DOMID_SELF
251 	};
252 
253 	pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT);
254 	if (pages == NULL)
255 		return 0;
256 
257 	memset(mcl, 0, sizeof(mcl));
258 
259 	while (i < nr_pages) {
260 		unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE);
261 
262 		mcl[j].op = __HYPERVISOR_update_va_mapping;
263 		mcl[j].args[0] = va;
264 
265 		mfn_list[j++] = vtomach(va) >> PAGE_SHIFT;
266 
267 		xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY;
268 
269 		if (j == 16 || i == nr_pages) {
270 			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL;
271 
272 			reservation.nr_extents = j;
273 
274 			mcl[j].op = __HYPERVISOR_memory_op;
275 			mcl[j].args[0] = XENMEM_decrease_reservation;
276 			mcl[j].args[1] =  (unsigned long)&reservation;
277 
278 			(void)HYPERVISOR_multicall(mcl, j+1);
279 
280 			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0;
281 			j = 0;
282 		}
283 	}
284 
285 	return (unsigned long)pages;
286 }
287 
288 #ifdef XEN_NETBACK_FIXUP_CSUM
289 static void
290 fixup_checksum(struct mbuf *m)
291 {
292 	struct ether_header *eh = mtod(m, struct ether_header *);
293 	struct ip *ip = (struct ip *)(eh + 1);
294 	int iphlen = ip->ip_hl << 2;
295 	int iplen = ntohs(ip->ip_len);
296 
297 	if ((m->m_pkthdr.csum_flags & CSUM_TCP)) {
298 		struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iphlen);
299 		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
300 			htons(IPPROTO_TCP + (iplen - iphlen)));
301 		th->th_sum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen);
302 		m->m_pkthdr.csum_flags &= ~CSUM_TCP;
303 #ifdef SCTP
304 	} else if (sw_csum & CSUM_SCTP) {
305 		sctp_delayed_cksum(m, iphlen);
306 		sw_csum &= ~CSUM_SCTP;
307 #endif
308 	} else {
309 		u_short csum;
310 		struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen);
311 		uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
312 			htons(IPPROTO_UDP + (iplen - iphlen)));
313 		if ((csum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen)) == 0)
314 			csum = 0xffff;
315 		uh->uh_sum = csum;
316 		m->m_pkthdr.csum_flags &= ~CSUM_UDP;
317 	}
318 }
319 #endif
320 
321 /* Add the interface to the specified bridge */
322 static int
323 add_to_bridge(struct ifnet *ifp, char *bridge)
324 {
325 	struct ifdrv ifd;
326 	struct ifbreq ifb;
327 	struct ifnet *ifp_bridge = ifunit(bridge);
328 
329 	if (!ifp_bridge)
330 		return ENOENT;
331 
332 	bzero(&ifd, sizeof(ifd));
333 	bzero(&ifb, sizeof(ifb));
334 
335 	strcpy(ifb.ifbr_ifsname, ifp->if_xname);
336 	strcpy(ifd.ifd_name, ifp->if_xname);
337 	ifd.ifd_cmd = BRDGADD;
338 	ifd.ifd_len = sizeof(ifb);
339 	ifd.ifd_data = &ifb;
340 
341 	return bridge_ioctl_kern(ifp_bridge, SIOCSDRVSPEC, &ifd);
342 
343 }
344 
345 static int
346 netif_create(int handle, struct xenbus_device *xdev, char *bridge)
347 {
348 	netif_t *netif;
349 	struct ifnet *ifp;
350 
351 	netif = (netif_t *)malloc(sizeof(*netif), M_DEVBUF, M_NOWAIT | M_ZERO);
352 	if (!netif)
353 		return ENOMEM;
354 
355 	netif->ref_cnt = 1;
356 	netif->handle = handle;
357 	netif->domid = xdev->otherend_id;
358 	netif->xdev = xdev;
359 	netif->bridge = bridge;
360 	xdev->data = netif;
361 
362 	/* Set up ifnet structure */
363 	ifp = netif->ifp = if_alloc(IFT_ETHER);
364 	if (!ifp) {
365 		if (bridge)
366 			free(bridge, M_DEVBUF);
367 		free(netif, M_DEVBUF);
368 		return ENOMEM;
369 	}
370 
371 	ifp->if_softc = netif;
372 	if_initname(ifp, "vif",
373 		atomic_fetchadd_int(&vif_unit_maker, 1) /* ifno */ );
374 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
375 	ifp->if_output = ether_output;
376 	ifp->if_start = netback_start;
377 	ifp->if_ioctl = netback_ioctl;
378 	ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
379 
380 	DPRINTF("Created %s for domid=%d handle=%d\n", IFNAME(netif), netif->domid, netif->handle);
381 
382 	return 0;
383 }
384 
385 static void
386 netif_get(netif_t *netif)
387 {
388 	atomic_add_int(&netif->ref_cnt, 1);
389 }
390 
391 static void
392 netif_put(netif_t *netif)
393 {
394 	if (atomic_fetchadd_int(&netif->ref_cnt, -1) == 1) {
395 		DPRINTF("%s\n", IFNAME(netif));
396 		disconnect_rings(netif);
397 		if (netif->ifp) {
398 			if_free(netif->ifp);
399 			netif->ifp = NULL;
400 		}
401 		if (netif->bridge)
402 			free(netif->bridge, M_DEVBUF);
403 		free(netif, M_DEVBUF);
404 	}
405 }
406 
407 static int
408 netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
409 {
410 	switch (cmd) {
411 	case SIOCSIFFLAGS:
412 	DDPRINTF("%s cmd=SIOCSIFFLAGS flags=%x\n",
413 			IFNAME((struct netback_info *)ifp->if_softc), ((struct ifreq *)data)->ifr_flags);
414 		return 0;
415 	}
416 
417 	DDPRINTF("%s cmd=%lx\n", IFNAME((struct netback_info *)ifp->if_softc), cmd);
418 
419 	return ether_ioctl(ifp, cmd, data);
420 }
421 
422 static inline void
423 maybe_schedule_tx_action(void)
424 {
425 	smp_mb();
426 	if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && !STAILQ_EMPTY(&tx_sched_list))
427 		taskqueue_enqueue(taskqueue_swi, &net_tx_task);
428 }
429 
430 /* Removes netif from front of list and does not call netif_put() (caller must) */
431 static netif_t *
432 remove_from_tx_schedule_list(void)
433 {
434 	netif_t *netif;
435 
436 	mtx_lock(&tx_sched_list_lock);
437 
438 	if ((netif = STAILQ_FIRST(&tx_sched_list))) {
439 		STAILQ_REMOVE(&tx_sched_list, netif, netback_info, next_tx);
440 		STAILQ_NEXT(netif, next_tx) = NULL;
441 		netif->on_tx_sched_list = 0;
442 	}
443 
444 	mtx_unlock(&tx_sched_list_lock);
445 
446 	return netif;
447 }
448 
449 /* Adds netif to end of list and calls netif_get() */
450 static void
451 add_to_tx_schedule_list_tail(netif_t *netif)
452 {
453 	if (netif->on_tx_sched_list)
454 		return;
455 
456 	mtx_lock(&tx_sched_list_lock);
457 	if (!netif->on_tx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
458 		netif_get(netif);
459 		STAILQ_INSERT_TAIL(&tx_sched_list, netif, next_tx);
460 		netif->on_tx_sched_list = 1;
461 	}
462 	mtx_unlock(&tx_sched_list_lock);
463 }
464 
465 /*
466  * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
467  * If this driver is pipelining transmit requests then we can be very
468  * aggressive in avoiding new-packet notifications -- frontend only needs to
469  * send a notification if there are no outstanding unreceived responses.
470  * If we may be buffer transmit buffers for any reason then we must be rather
471  * more conservative and treat this as the final check for pending work.
472  */
473 static void
474 netif_schedule_tx_work(netif_t *netif)
475 {
476 	int more_to_do;
477 
478 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
479 	more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
480 #else
481 	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
482 #endif
483 
484 	if (more_to_do) {
485 		DDPRINTF("Adding %s to tx sched list\n", IFNAME(netif));
486 		add_to_tx_schedule_list_tail(netif);
487 		maybe_schedule_tx_action();
488 	}
489 }
490 
491 static struct mtx dealloc_lock;
492 MTX_SYSINIT(netback_dealloc, &dealloc_lock, "DEALLOC LOCK", MTX_SPIN | MTX_NOWITNESS);
493 
494 static void
495 netif_idx_release(uint16_t pending_idx)
496 {
497 	mtx_lock_spin(&dealloc_lock);
498 	dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
499 	mtx_unlock_spin(&dealloc_lock);
500 
501 	taskqueue_enqueue(taskqueue_swi, &net_tx_task);
502 }
503 
504 static void
505 make_tx_response(netif_t *netif,
506 				 uint16_t    id,
507 				 int8_t      st)
508 {
509 	RING_IDX i = netif->tx.rsp_prod_pvt;
510 	netif_tx_response_t *resp;
511 	int notify;
512 
513 	resp = RING_GET_RESPONSE(&netif->tx, i);
514 	resp->id     = id;
515 	resp->status = st;
516 
517 	netif->tx.rsp_prod_pvt = ++i;
518 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
519 	if (notify)
520 		notify_remote_via_irq(netif->irq);
521 
522 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
523 	if (i == netif->tx.req_cons) {
524 		int more_to_do;
525 		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
526 		if (more_to_do)
527 			add_to_tx_schedule_list_tail(netif);
528 	}
529 #endif
530 }
531 
532 static inline void
533 net_tx_action_dealloc(void)
534 {
535 	gnttab_unmap_grant_ref_t *gop;
536 	uint16_t pending_idx;
537 	PEND_RING_IDX dc, dp;
538 	netif_t *netif;
539 	int ret;
540 
541 	dc = dealloc_cons;
542 	dp = dealloc_prod;
543 
544 	/*
545 	 * Free up any grants we have finished using
546 	 */
547 	gop = tx_unmap_ops;
548 	while (dc != dp) {
549 		pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
550 		gop->host_addr    = MMAP_VADDR(pending_idx);
551 		gop->dev_bus_addr = 0;
552 		gop->handle       = grant_tx_handle[pending_idx];
553 		gop++;
554 	}
555 	ret = HYPERVISOR_grant_table_op(
556 		GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
557 	BUG_ON(ret);
558 
559 	while (dealloc_cons != dp) {
560 		pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
561 
562 		netif = pending_tx_info[pending_idx].netif;
563 
564 		make_tx_response(netif, pending_tx_info[pending_idx].req.id,
565 				 NETIF_RSP_OKAY);
566 
567 		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
568 
569 		netif_put(netif);
570 	}
571 }
572 
573 static void
574 netif_page_release(void *buf, void *args)
575 {
576 	uint16_t pending_idx = (unsigned int)args;
577 
578 	DDPRINTF("pending_idx=%u\n", pending_idx);
579 
580 	KASSERT(pending_idx < MAX_PENDING_REQS, ("%s: bad index %u", __func__, pending_idx));
581 
582 	netif_idx_release(pending_idx);
583 }
584 
585 static void
586 net_tx_action(void *context, int pending)
587 {
588 	struct mbuf *m;
589 	netif_t *netif;
590 	netif_tx_request_t txreq;
591 	uint16_t pending_idx;
592 	RING_IDX i;
593 	gnttab_map_grant_ref_t *mop;
594 	int ret, work_to_do;
595 	struct mbuf *txq = NULL, *txq_last = NULL;
596 
597 	if (dealloc_cons != dealloc_prod)
598 		net_tx_action_dealloc();
599 
600 	mop = tx_map_ops;
601 	while ((NR_PENDING_REQS < MAX_PENDING_REQS) && !STAILQ_EMPTY(&tx_sched_list)) {
602 
603 		/* Get a netif from the list with work to do. */
604 		netif = remove_from_tx_schedule_list();
605 
606 		DDPRINTF("Processing %s (prod=%u, cons=%u)\n",
607 				IFNAME(netif), netif->tx.sring->req_prod, netif->tx.req_cons);
608 
609 		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
610 		if (!work_to_do) {
611 			netif_put(netif);
612 			continue;
613 		}
614 
615 		i = netif->tx.req_cons;
616 		rmb(); /* Ensure that we see the request before we copy it. */
617 		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
618 
619 		/* If we want credit-based scheduling, coud add it here - WORK */
620 
621 		netif->tx.req_cons++;
622 
623 		netif_schedule_tx_work(netif);
624 
625 		if (unlikely(txreq.size < ETHER_HDR_LEN) ||
626 		    unlikely(txreq.size > (ETHER_MAX_LEN-ETHER_CRC_LEN))) {
627 			WPRINTF("Bad packet size: %d\n", txreq.size);
628 			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
629 			netif_put(netif);
630 			continue;
631 		}
632 
633 		/* No crossing a page as the payload mustn't fragment. */
634 		if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) {
635 			WPRINTF("txreq.offset: %x, size: %u, end: %u\n",
636 				txreq.offset, txreq.size,
637 				(txreq.offset & PAGE_MASK) + txreq.size);
638 			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
639 			netif_put(netif);
640 			continue;
641 		}
642 
643 		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
644 
645 		MGETHDR(m, M_DONTWAIT, MT_DATA);
646 		if (!m) {
647 			WPRINTF("Failed to allocate mbuf\n");
648 			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
649 			netif_put(netif);
650 			break;
651 		}
652 		m->m_pkthdr.rcvif = netif->ifp;
653 
654 		if ((m->m_pkthdr.len = txreq.size) > PKT_PROT_LEN) {
655 			struct mbuf *n;
656 			MGET(n, M_DONTWAIT, MT_DATA);
657 			if (!(m->m_next = n)) {
658 				m_freem(m);
659 				WPRINTF("Failed to allocate second mbuf\n");
660 				make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
661 				netif_put(netif);
662 				break;
663 			}
664 			n->m_len = txreq.size - PKT_PROT_LEN;
665 			m->m_len = PKT_PROT_LEN;
666 		} else
667 			m->m_len = txreq.size;
668 
669 		mop->host_addr = MMAP_VADDR(pending_idx);
670 		mop->dom       = netif->domid;
671 		mop->ref       = txreq.gref;
672 		mop->flags     = GNTMAP_host_map | GNTMAP_readonly;
673 		mop++;
674 
675 		memcpy(&pending_tx_info[pending_idx].req,
676 		       &txreq, sizeof(txreq));
677 		pending_tx_info[pending_idx].netif = netif;
678 		*((uint16_t *)m->m_data) = pending_idx;
679 
680 		if (txq_last)
681 			txq_last->m_nextpkt = m;
682 		else
683 			txq = m;
684 		txq_last = m;
685 
686 		pending_cons++;
687 
688 		if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
689 			break;
690 	}
691 
692 	if (!txq)
693 		return;
694 
695 	ret = HYPERVISOR_grant_table_op(
696 		GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
697 	BUG_ON(ret);
698 
699 	mop = tx_map_ops;
700 	while ((m = txq) != NULL) {
701 		caddr_t data;
702 
703 		txq = m->m_nextpkt;
704 		m->m_nextpkt = NULL;
705 
706 		pending_idx = *((uint16_t *)m->m_data);
707 		netif       = pending_tx_info[pending_idx].netif;
708 		memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
709 
710 		/* Check the remap error code. */
711 		if (unlikely(mop->status)) {
712 			WPRINTF("#### netback grant fails\n");
713 			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
714 			netif_put(netif);
715 			m_freem(m);
716 			mop++;
717 			pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
718 			continue;
719 		}
720 
721 #if 0
722 		/* Can't do this in FreeBSD since vtophys() returns the pfn */
723 		/* of the remote domain who loaned us the machine page - DPT */
724 		xen_phys_machine[(vtophys(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT)] =
725 			mop->dev_bus_addr >> PAGE_SHIFT;
726 #endif
727 		grant_tx_handle[pending_idx] = mop->handle;
728 
729 		/* Setup data in mbuf (lengths are already set) */
730 		data = (caddr_t)(MMAP_VADDR(pending_idx)|txreq.offset);
731 		bcopy(data, m->m_data, m->m_len);
732 		if (m->m_next) {
733 			struct mbuf *n = m->m_next;
734 			MEXTADD(n, MMAP_VADDR(pending_idx), PAGE_SIZE, netif_page_release,
735 				(void *)(unsigned int)pending_idx, M_RDONLY, EXT_NET_DRV);
736 			n->m_data = &data[PKT_PROT_LEN];
737 		} else {
738 			/* Schedule a response immediately. */
739 			netif_idx_release(pending_idx);
740 		}
741 
742 		if ((txreq.flags & NETTXF_data_validated)) {
743 			/* Tell the stack the checksums are okay */
744 			m->m_pkthdr.csum_flags |=
745 				(CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
746 			m->m_pkthdr.csum_data = 0xffff;
747 		}
748 
749 		/* If necessary, inform stack to compute the checksums if it forwards the packet */
750 		if ((txreq.flags & NETTXF_csum_blank)) {
751 			struct ether_header *eh = mtod(m, struct ether_header *);
752 			if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
753 				struct ip *ip = (struct ip *)&m->m_data[14];
754 				if (ip->ip_p == IPPROTO_TCP)
755 					m->m_pkthdr.csum_flags |= CSUM_TCP;
756 				else if (ip->ip_p == IPPROTO_UDP)
757 					m->m_pkthdr.csum_flags |= CSUM_UDP;
758 			}
759 		}
760 
761 		netif->ifp->if_ibytes += m->m_pkthdr.len;
762 		netif->ifp->if_ipackets++;
763 
764 		DDPRINTF("RECV %d bytes from %s (cflags=%x)\n",
765 			m->m_pkthdr.len, IFNAME(netif), m->m_pkthdr.csum_flags);
766 		DPRINTF_MBUF_LEN(m, 128);
767 
768 		(*netif->ifp->if_input)(netif->ifp, m);
769 
770 		mop++;
771 	}
772 }
773 
774 /* Handle interrupt from a frontend */
775 static void
776 netback_intr(void *arg)
777 {
778 	netif_t *netif = arg;
779 	DDPRINTF("%s\n", IFNAME(netif));
780 	add_to_tx_schedule_list_tail(netif);
781 	maybe_schedule_tx_action();
782 }
783 
784 /* Removes netif from front of list and does not call netif_put() (caller must) */
785 static netif_t *
786 remove_from_rx_schedule_list(void)
787 {
788 	netif_t *netif;
789 
790 	mtx_lock(&rx_sched_list_lock);
791 
792 	if ((netif = STAILQ_FIRST(&rx_sched_list))) {
793 		STAILQ_REMOVE(&rx_sched_list, netif, netback_info, next_rx);
794 		STAILQ_NEXT(netif, next_rx) = NULL;
795 		netif->on_rx_sched_list = 0;
796 	}
797 
798 	mtx_unlock(&rx_sched_list_lock);
799 
800 	return netif;
801 }
802 
803 /* Adds netif to end of list and calls netif_get() */
804 static void
805 add_to_rx_schedule_list_tail(netif_t *netif)
806 {
807 	if (netif->on_rx_sched_list)
808 		return;
809 
810 	mtx_lock(&rx_sched_list_lock);
811 	if (!netif->on_rx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
812 		netif_get(netif);
813 		STAILQ_INSERT_TAIL(&rx_sched_list, netif, next_rx);
814 		netif->on_rx_sched_list = 1;
815 	}
816 	mtx_unlock(&rx_sched_list_lock);
817 }
818 
819 static int
820 make_rx_response(netif_t *netif, uint16_t id, int8_t st,
821 				 uint16_t offset, uint16_t size, uint16_t flags)
822 {
823 	RING_IDX i = netif->rx.rsp_prod_pvt;
824 	netif_rx_response_t *resp;
825 	int notify;
826 
827 	resp = RING_GET_RESPONSE(&netif->rx, i);
828 	resp->offset     = offset;
829 	resp->flags      = flags;
830 	resp->id         = id;
831 	resp->status     = (int16_t)size;
832 	if (st < 0)
833 		resp->status = (int16_t)st;
834 
835 	DDPRINTF("rx resp(%d): off=%x fl=%x id=%x stat=%d\n",
836 		i, resp->offset, resp->flags, resp->id, resp->status);
837 
838 	netif->rx.rsp_prod_pvt = ++i;
839 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify);
840 
841 	return notify;
842 }
843 
844 static int
845 netif_rx(netif_t *netif)
846 {
847 	struct ifnet *ifp = netif->ifp;
848 	struct mbuf *m;
849 	multicall_entry_t *mcl;
850 	mmu_update_t *mmu;
851 	gnttab_transfer_t *gop;
852 	unsigned long vdata, old_mfn, new_mfn;
853 	struct mbuf *rxq = NULL, *rxq_last = NULL;
854 	int ret, notify = 0, pkts_dequeued = 0;
855 
856 	DDPRINTF("%s\n", IFNAME(netif));
857 
858 	mcl = rx_mcl;
859 	mmu = rx_mmu;
860 	gop = grant_rx_op;
861 
862 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
863 
864 		/* Quit if the target domain has no receive buffers */
865 		if (netif->rx.req_cons == netif->rx.sring->req_prod)
866 			break;
867 
868 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
869 		if (m == NULL)
870 			break;
871 
872 		pkts_dequeued++;
873 
874 		/* Check if we need to copy the data */
875 		if (((m->m_flags & (M_RDONLY|M_EXT)) != M_EXT) ||
876 			(*m->m_ext.ref_cnt > 1) || m->m_next != NULL) {
877 			struct mbuf *n;
878 
879 			DDPRINTF("copying mbuf (fl=%x ext=%x rc=%d n=%x)\n",
880 				m->m_flags,
881 				(m->m_flags & M_EXT) ? m->m_ext.ext_type : 0,
882 				(m->m_flags & M_EXT) ? *m->m_ext.ref_cnt : 0,
883 				(unsigned int)m->m_next);
884 
885 			/* Make copy */
886 			MGETHDR(n, M_DONTWAIT, MT_DATA);
887 			if (!n)
888 				goto drop;
889 
890 			MCLGET(n, M_DONTWAIT);
891 			if (!(n->m_flags & M_EXT)) {
892 				m_freem(n);
893 				goto drop;
894 			}
895 
896 			/* Leave space at front and keep current alignment */
897 			n->m_data += 16 + ((unsigned int)m->m_data & 0x3);
898 
899 			if (m->m_pkthdr.len > M_TRAILINGSPACE(n)) {
900 				WPRINTF("pkt to big %d\n", m->m_pkthdr.len);
901 				m_freem(n);
902 				goto drop;
903 			}
904 			m_copydata(m, 0, m->m_pkthdr.len, n->m_data);
905 			n->m_pkthdr.len = n->m_len = m->m_pkthdr.len;
906 			n->m_pkthdr.csum_flags = (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA);
907 			m_freem(m);
908 			m = n;
909 		}
910 
911 		vdata = (unsigned long)m->m_data;
912 		old_mfn = vtomach(vdata) >> PAGE_SHIFT;
913 
914 		if ((new_mfn = alloc_mfn()) == 0)
915 			goto drop;
916 
917 #ifdef XEN_NETBACK_FIXUP_CSUM
918 		/* Check if we need to compute a checksum.  This happens */
919 		/* when bridging from one domain to another. */
920 		if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) ||
921 			(m->m_pkthdr.csum_flags & CSUM_SCTP))
922 			fixup_checksum(m);
923 #endif
924 
925 		xen_phys_machine[(vtophys(vdata) >> PAGE_SHIFT)] = new_mfn;
926 
927 		mcl->op = __HYPERVISOR_update_va_mapping;
928 		mcl->args[0] = vdata;
929 		mcl->args[1] = (new_mfn << PAGE_SHIFT) | PG_V | PG_RW | PG_M | PG_A;
930 		mcl->args[2] = 0;
931 		mcl->args[3] = 0;
932 		mcl++;
933 
934 		gop->mfn = old_mfn;
935 		gop->domid = netif->domid;
936 		gop->ref = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons)->gref;
937 		netif->rx.req_cons++;
938 		gop++;
939 
940 		mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
941 		mmu->val = vtophys(vdata) >> PAGE_SHIFT;
942 		mmu++;
943 
944 		if (rxq_last)
945 			rxq_last->m_nextpkt = m;
946 		else
947 			rxq = m;
948 		rxq_last = m;
949 
950 		DDPRINTF("XMIT %d bytes to %s\n", m->m_pkthdr.len, IFNAME(netif));
951 		DPRINTF_MBUF_LEN(m, 128);
952 
953 		/* Filled the batch queue? */
954 		if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op))
955 			break;
956 
957 		continue;
958 	drop:
959 		DDPRINTF("dropping pkt\n");
960 		ifp->if_oerrors++;
961 		m_freem(m);
962 	}
963 
964 	if (mcl == rx_mcl)
965 		return pkts_dequeued;
966 
967 	mcl->op = __HYPERVISOR_mmu_update;
968 	mcl->args[0] = (unsigned long)rx_mmu;
969 	mcl->args[1] = mmu - rx_mmu;
970 	mcl->args[2] = 0;
971 	mcl->args[3] = DOMID_SELF;
972 	mcl++;
973 
974 	mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
975 	ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
976 	BUG_ON(ret != 0);
977 
978 	ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, gop - grant_rx_op);
979 	BUG_ON(ret != 0);
980 
981 	mcl = rx_mcl;
982 	gop = grant_rx_op;
983 
984 	while ((m = rxq) != NULL) {
985 		int8_t status;
986 		uint16_t id, flags = 0;
987 
988 		rxq = m->m_nextpkt;
989 		m->m_nextpkt = NULL;
990 
991 		/* Rederive the machine addresses. */
992 		new_mfn = mcl->args[1] >> PAGE_SHIFT;
993 		old_mfn = gop->mfn;
994 
995 		ifp->if_obytes += m->m_pkthdr.len;
996 		ifp->if_opackets++;
997 
998 		/* The update_va_mapping() must not fail. */
999 		BUG_ON(mcl->result != 0);
1000 
1001 		/* Setup flags */
1002 		if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA))
1003 			flags |= NETRXF_csum_blank | NETRXF_data_validated;
1004 		else if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID))
1005 			flags |= NETRXF_data_validated;
1006 
1007 		/* Check the reassignment error code. */
1008 		status = NETIF_RSP_OKAY;
1009 		if (gop->status != 0) {
1010 			DPRINTF("Bad status %d from grant transfer to DOM%u\n",
1011 				gop->status, netif->domid);
1012 			/*
1013 			 * Page no longer belongs to us unless GNTST_bad_page,
1014 			 * but that should be a fatal error anyway.
1015 			 */
1016 			BUG_ON(gop->status == GNTST_bad_page);
1017 			status = NETIF_RSP_ERROR;
1018 		}
1019 		id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id;
1020 		notify |= make_rx_response(netif, id, status,
1021 					(unsigned long)m->m_data & PAGE_MASK,
1022 					m->m_pkthdr.len, flags);
1023 
1024 		m_freem(m);
1025 		mcl++;
1026 		gop++;
1027 	}
1028 
1029 	if (notify)
1030 		notify_remote_via_irq(netif->irq);
1031 
1032 	return pkts_dequeued;
1033 }
1034 
1035 static void
1036 rx_task_timer(void *arg)
1037 {
1038 	DDPRINTF("\n");
1039 	taskqueue_enqueue(taskqueue_swi, &net_rx_task);
1040 }
1041 
1042 static void
1043 net_rx_action(void *context, int pending)
1044 {
1045 	netif_t *netif, *last_zero_work = NULL;
1046 
1047 	DDPRINTF("\n");
1048 
1049 	while ((netif = remove_from_rx_schedule_list())) {
1050 		struct ifnet *ifp = netif->ifp;
1051 
1052 		if (netif == last_zero_work) {
1053 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1054 				add_to_rx_schedule_list_tail(netif);
1055 			netif_put(netif);
1056 			if (!STAILQ_EMPTY(&rx_sched_list))
1057 				callout_reset(&rx_task_callout, 1, rx_task_timer, NULL);
1058 			break;
1059 		}
1060 
1061 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1062 			if (netif_rx(netif))
1063 				last_zero_work = NULL;
1064 			else if (!last_zero_work)
1065 				last_zero_work = netif;
1066 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1067 				add_to_rx_schedule_list_tail(netif);
1068 		}
1069 
1070 		netif_put(netif);
1071 	}
1072 }
1073 
1074 static void
1075 netback_start(struct ifnet *ifp)
1076 {
1077 	netif_t *netif = (netif_t *)ifp->if_softc;
1078 
1079 	DDPRINTF("%s\n", IFNAME(netif));
1080 
1081 	add_to_rx_schedule_list_tail(netif);
1082 	taskqueue_enqueue(taskqueue_swi, &net_rx_task);
1083 }
1084 
1085 /* Map a grant ref to a ring */
1086 static int
1087 map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring)
1088 {
1089 	struct gnttab_map_grant_ref op;
1090 
1091 	ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
1092 	if (ring->va == 0)
1093 		return ENOMEM;
1094 
1095 	op.host_addr = ring->va;
1096 	op.flags = GNTMAP_host_map;
1097 	op.ref = ref;
1098 	op.dom = dom;
1099 	HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
1100 	if (op.status) {
1101 		WPRINTF("grant table op err=%d\n", op.status);
1102 		kmem_free(kernel_map, ring->va, PAGE_SIZE);
1103 		ring->va = 0;
1104 		return EACCES;
1105 	}
1106 
1107 	ring->handle = op.handle;
1108 	ring->bus_addr = op.dev_bus_addr;
1109 
1110 	return 0;
1111 }
1112 
1113 /* Unmap grant ref for a ring */
1114 static void
1115 unmap_ring(struct ring_ref *ring)
1116 {
1117 	struct gnttab_unmap_grant_ref op;
1118 
1119 	op.host_addr = ring->va;
1120 	op.dev_bus_addr = ring->bus_addr;
1121 	op.handle = ring->handle;
1122 	HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
1123 	if (op.status)
1124 		WPRINTF("grant table op err=%d\n", op.status);
1125 
1126 	kmem_free(kernel_map, ring->va, PAGE_SIZE);
1127 	ring->va = 0;
1128 }
1129 
1130 static int
1131 connect_rings(netif_t *netif)
1132 {
1133 	struct xenbus_device *xdev = netif->xdev;
1134 	netif_tx_sring_t *txs;
1135 	netif_rx_sring_t *rxs;
1136 	unsigned long tx_ring_ref, rx_ring_ref;
1137 	evtchn_port_t evtchn;
1138 	evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
1139 	int err;
1140 
1141 	// Grab FE data and map his memory
1142 	err = xenbus_gather(NULL, xdev->otherend,
1143 			"tx-ring-ref", "%lu", &tx_ring_ref,
1144 		    "rx-ring-ref", "%lu", &rx_ring_ref,
1145 		    "event-channel", "%u", &evtchn, NULL);
1146 	if (err) {
1147 		xenbus_dev_fatal(xdev, err,
1148 			"reading %s/ring-ref and event-channel",
1149 			xdev->otherend);
1150 		return err;
1151 	}
1152 
1153 	err = map_ring(tx_ring_ref, netif->domid, &netif->tx_ring_ref);
1154 	if (err) {
1155 		xenbus_dev_fatal(xdev, err, "mapping tx ring");
1156 		return err;
1157 	}
1158 	txs = (netif_tx_sring_t *)netif->tx_ring_ref.va;
1159 	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
1160 
1161 	err = map_ring(rx_ring_ref, netif->domid, &netif->rx_ring_ref);
1162 	if (err) {
1163 		unmap_ring(&netif->tx_ring_ref);
1164 		xenbus_dev_fatal(xdev, err, "mapping rx ring");
1165 		return err;
1166 	}
1167 	rxs = (netif_rx_sring_t *)netif->rx_ring_ref.va;
1168 	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
1169 
1170 	op.u.bind_interdomain.remote_dom = netif->domid;
1171 	op.u.bind_interdomain.remote_port = evtchn;
1172 	err = HYPERVISOR_event_channel_op(&op);
1173 	if (err) {
1174 		unmap_ring(&netif->tx_ring_ref);
1175 		unmap_ring(&netif->rx_ring_ref);
1176 		xenbus_dev_fatal(xdev, err, "binding event channel");
1177 		return err;
1178 	}
1179 	netif->evtchn = op.u.bind_interdomain.local_port;
1180 
1181 	/* bind evtchn to irq handler */
1182 	netif->irq =
1183 		bind_evtchn_to_irqhandler(netif->evtchn, "netback",
1184 			netback_intr, netif, INTR_TYPE_NET|INTR_MPSAFE, &netif->irq_cookie);
1185 
1186 	netif->rings_connected = 1;
1187 
1188 	DPRINTF("%s connected! evtchn=%d irq=%d\n",
1189 		IFNAME(netif), netif->evtchn, netif->irq);
1190 
1191 	return 0;
1192 }
1193 
1194 static void
1195 disconnect_rings(netif_t *netif)
1196 {
1197 	DPRINTF("\n");
1198 
1199 	if (netif->rings_connected) {
1200 		unbind_from_irqhandler(netif->irq, netif->irq_cookie);
1201 		netif->irq = 0;
1202 		unmap_ring(&netif->tx_ring_ref);
1203 		unmap_ring(&netif->rx_ring_ref);
1204 		netif->rings_connected = 0;
1205 	}
1206 }
1207 
1208 static void
1209 connect(netif_t *netif)
1210 {
1211 	if (!netif->xdev ||
1212 		!netif->attached ||
1213 		netif->frontend_state != XenbusStateConnected) {
1214 		return;
1215 	}
1216 
1217 	if (!connect_rings(netif)) {
1218 		xenbus_switch_state(netif->xdev, NULL, XenbusStateConnected);
1219 
1220 		/* Turn on interface */
1221 		netif->ifp->if_drv_flags |= IFF_DRV_RUNNING;
1222 		netif->ifp->if_flags |= IFF_UP;
1223 	}
1224 }
1225 
1226 static int
1227 netback_remove(struct xenbus_device *xdev)
1228 {
1229 	netif_t *netif = xdev->data;
1230 	device_t ndev;
1231 
1232 	DPRINTF("remove %s\n", xdev->nodename);
1233 
1234 	if ((ndev = netif->ndev)) {
1235 		netif->ndev = NULL;
1236 		mtx_lock(&Giant);
1237 		device_detach(ndev);
1238 		mtx_unlock(&Giant);
1239 	}
1240 
1241 	xdev->data = NULL;
1242 	netif->xdev = NULL;
1243 	netif_put(netif);
1244 
1245 	return 0;
1246 }
1247 
1248 /**
1249  * Entry point to this code when a new device is created.  Allocate the basic
1250  * structures and the ring buffers for communication with the frontend.
1251  * Switch to Connected state.
1252  */
1253 static int
1254 netback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id)
1255 {
1256 	int err;
1257 	long handle;
1258 	char *bridge;
1259 
1260 	DPRINTF("node=%s\n", xdev->nodename);
1261 
1262 	/* Grab the handle */
1263 	err = xenbus_scanf(NULL, xdev->nodename, "handle", "%li", &handle);
1264 	if (err != 1) {
1265 		xenbus_dev_fatal(xdev, err, "reading handle");
1266 		return err;
1267 	}
1268 
1269 	/* Check for bridge */
1270 	bridge = xenbus_read(NULL, xdev->nodename, "bridge", NULL);
1271 	if (IS_ERR(bridge))
1272 		bridge = NULL;
1273 
1274 	err = xenbus_switch_state(xdev, NULL, XenbusStateInitWait);
1275 	if (err) {
1276 		xenbus_dev_fatal(xdev, err, "writing switch state");
1277 		return err;
1278 	}
1279 
1280 	err = netif_create(handle, xdev, bridge);
1281 	if (err) {
1282 		xenbus_dev_fatal(xdev, err, "creating netif");
1283 		return err;
1284 	}
1285 
1286 	err = vif_add_dev(xdev);
1287 	if (err) {
1288 		netif_put((netif_t *)xdev->data);
1289 		xenbus_dev_fatal(xdev, err, "adding vif device");
1290 		return err;
1291 	}
1292 
1293 	return 0;
1294 }
1295 
1296 /**
1297  * We are reconnecting to the backend, due to a suspend/resume, or a backend
1298  * driver restart.  We tear down our netif structure and recreate it, but
1299  * leave the device-layer structures intact so that this is transparent to the
1300  * rest of the kernel.
1301  */
1302 static int netback_resume(struct xenbus_device *xdev)
1303 {
1304 	DPRINTF("node=%s\n", xdev->nodename);
1305 	return 0;
1306 }
1307 
1308 
1309 /**
1310  * Callback received when the frontend's state changes.
1311  */
1312 static void frontend_changed(struct xenbus_device *xdev,
1313 							 XenbusState frontend_state)
1314 {
1315 	netif_t *netif = xdev->data;
1316 
1317 	DPRINTF("state=%d\n", frontend_state);
1318 
1319 	netif->frontend_state = frontend_state;
1320 
1321 	switch (frontend_state) {
1322 	case XenbusStateInitialising:
1323 	case XenbusStateInitialised:
1324 		break;
1325 	case XenbusStateConnected:
1326 		connect(netif);
1327 		break;
1328 	case XenbusStateClosing:
1329 		xenbus_switch_state(xdev, NULL, XenbusStateClosing);
1330 		break;
1331 	case XenbusStateClosed:
1332 		xenbus_remove_device(xdev);
1333 		break;
1334 	case XenbusStateUnknown:
1335 	case XenbusStateInitWait:
1336 		xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend",
1337 						 frontend_state);
1338 		break;
1339 	}
1340 }
1341 
1342 /* ** Driver registration ** */
1343 
1344 static struct xenbus_device_id netback_ids[] = {
1345 	{ "vif" },
1346 	{ "" }
1347 };
1348 
1349 static struct xenbus_driver netback = {
1350 	.name = "netback",
1351 	.ids = netback_ids,
1352 	.probe = netback_probe,
1353 	.remove = netback_remove,
1354 	.resume= netback_resume,
1355 	.otherend_changed = frontend_changed,
1356 };
1357 
1358 static void
1359 netback_init(void *unused)
1360 {
1361 	callout_init(&rx_task_callout, CALLOUT_MPSAFE);
1362 
1363 	mmap_vstart = alloc_empty_page_range(MAX_PENDING_REQS);
1364 	BUG_ON(!mmap_vstart);
1365 
1366 	pending_cons = 0;
1367 	for (pending_prod = 0; pending_prod < MAX_PENDING_REQS; pending_prod++)
1368 		pending_ring[pending_prod] = pending_prod;
1369 
1370 	TASK_INIT(&net_tx_task, 0, net_tx_action, NULL);
1371 	TASK_INIT(&net_rx_task, 0, net_rx_action, NULL);
1372 	mtx_init(&tx_sched_list_lock, "nb_tx_sched_lock", "netback tx sched lock", MTX_DEF);
1373 	mtx_init(&rx_sched_list_lock, "nb_rx_sched_lock", "netback rx sched lock", MTX_DEF);
1374 
1375 	DPRINTF("registering %s\n", netback.name);
1376 
1377 	xenbus_register_backend(&netback);
1378 }
1379 
1380 SYSINIT(xnbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, netback_init, NULL)
1381 
1382 static int
1383 vif_add_dev(struct xenbus_device *xdev)
1384 {
1385 	netif_t *netif = xdev->data;
1386 	device_t nexus, ndev;
1387 	devclass_t dc;
1388 	int err = 0;
1389 
1390 	mtx_lock(&Giant);
1391 
1392 	/* We will add a vif device as a child of nexus0 (for now) */
1393 	if (!(dc = devclass_find("nexus")) ||
1394 		!(nexus = devclass_get_device(dc, 0))) {
1395 		WPRINTF("could not find nexus0!\n");
1396 		err = ENOENT;
1397 		goto done;
1398 	}
1399 
1400 
1401 	/* Create a newbus device representing the vif */
1402 	ndev = BUS_ADD_CHILD(nexus, 0, "vif", netif->ifp->if_dunit);
1403 	if (!ndev) {
1404 		WPRINTF("could not create newbus device %s!\n", IFNAME(netif));
1405 		err = EFAULT;
1406 		goto done;
1407 	}
1408 
1409 	netif_get(netif);
1410 	device_set_ivars(ndev, netif);
1411 	netif->ndev = ndev;
1412 
1413 	device_probe_and_attach(ndev);
1414 
1415  done:
1416 
1417 	mtx_unlock(&Giant);
1418 
1419 	return err;
1420 }
1421 
1422 enum {
1423 	VIF_SYSCTL_DOMID,
1424 	VIF_SYSCTL_HANDLE,
1425 	VIF_SYSCTL_TXRING,
1426 	VIF_SYSCTL_RXRING,
1427 };
1428 
1429 static char *
1430 vif_sysctl_ring_info(netif_t *netif, int cmd)
1431 {
1432 	char *buf = malloc(256, M_DEVBUF, M_WAITOK);
1433 	if (buf) {
1434 		if (!netif->rings_connected)
1435 			sprintf(buf, "rings not connected\n");
1436 		else if (cmd == VIF_SYSCTL_TXRING) {
1437 			netif_tx_back_ring_t *tx = &netif->tx;
1438 			sprintf(buf, "nr_ents=%x req_cons=%x"
1439 					" req_prod=%x req_event=%x"
1440 					" rsp_prod=%x rsp_event=%x",
1441 					tx->nr_ents, tx->req_cons,
1442 					tx->sring->req_prod, tx->sring->req_event,
1443 					tx->sring->rsp_prod, tx->sring->rsp_event);
1444 		} else {
1445 			netif_rx_back_ring_t *rx = &netif->rx;
1446 			sprintf(buf, "nr_ents=%x req_cons=%x"
1447 					" req_prod=%x req_event=%x"
1448 					" rsp_prod=%x rsp_event=%x",
1449 					rx->nr_ents, rx->req_cons,
1450 					rx->sring->req_prod, rx->sring->req_event,
1451 					rx->sring->rsp_prod, rx->sring->rsp_event);
1452 		}
1453 	}
1454 	return buf;
1455 }
1456 
1457 static int
1458 vif_sysctl_handler(SYSCTL_HANDLER_ARGS)
1459 {
1460 	device_t dev = (device_t)arg1;
1461 	netif_t *netif = (netif_t *)device_get_ivars(dev);
1462 	const char *value;
1463 	char *buf = NULL;
1464 	int err;
1465 
1466 	switch (arg2) {
1467 	case VIF_SYSCTL_DOMID:
1468 		return sysctl_handle_int(oidp, NULL, netif->domid, req);
1469 	case VIF_SYSCTL_HANDLE:
1470 		return sysctl_handle_int(oidp, NULL, netif->handle, req);
1471 	case VIF_SYSCTL_TXRING:
1472 	case VIF_SYSCTL_RXRING:
1473 		value = buf = vif_sysctl_ring_info(netif, arg2);
1474 		break;
1475 	default:
1476 		return (EINVAL);
1477 	}
1478 
1479 	err = SYSCTL_OUT(req, value, strlen(value));
1480 	if (buf != NULL)
1481 		free(buf, M_DEVBUF);
1482 
1483 	return err;
1484 }
1485 
1486 /* Newbus vif device driver probe */
1487 static int
1488 vif_probe(device_t dev)
1489 {
1490 	DDPRINTF("vif%d\n", device_get_unit(dev));
1491 	return 0;
1492 }
1493 
1494 /* Newbus vif device driver attach */
1495 static int
1496 vif_attach(device_t dev)
1497 {
1498 	netif_t *netif = (netif_t *)device_get_ivars(dev);
1499 	uint8_t mac[ETHER_ADDR_LEN];
1500 
1501 	DDPRINTF("%s\n", IFNAME(netif));
1502 
1503 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1504 	    OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD,
1505 	    dev, VIF_SYSCTL_DOMID, vif_sysctl_handler, "I",
1506 	    "domid of frontend");
1507 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1508 	    OID_AUTO, "handle", CTLTYPE_INT|CTLFLAG_RD,
1509 	    dev, VIF_SYSCTL_HANDLE, vif_sysctl_handler, "I",
1510 	    "handle of frontend");
1511 #ifdef XEN_NETBACK_DEBUG
1512 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1513 	    OID_AUTO, "txring", CTLTYPE_STRING | CTLFLAG_RD,
1514 	    dev, VIF_SYSCTL_TXRING, vif_sysctl_handler, "A",
1515 	    "tx ring info");
1516 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1517 	    OID_AUTO, "rxring", CTLTYPE_STRING | CTLFLAG_RD,
1518 	    dev, VIF_SYSCTL_RXRING, vif_sysctl_handler, "A",
1519 	    "rx ring info");
1520 #endif
1521 
1522 	memset(mac, 0xff, sizeof(mac));
1523 	mac[0] &= ~0x01;
1524 
1525 	ether_ifattach(netif->ifp, mac);
1526 	netif->attached = 1;
1527 
1528 	connect(netif);
1529 
1530 	if (netif->bridge) {
1531 		DPRINTF("Adding %s to bridge %s\n", IFNAME(netif), netif->bridge);
1532 		int err = add_to_bridge(netif->ifp, netif->bridge);
1533 		if (err) {
1534 			WPRINTF("Error adding %s to %s; err=%d\n",
1535 				IFNAME(netif), netif->bridge, err);
1536 		}
1537 	}
1538 
1539 	return bus_generic_attach(dev);
1540 }
1541 
1542 /* Newbus vif device driver detach */
1543 static int
1544 vif_detach(device_t dev)
1545 {
1546 	netif_t *netif = (netif_t *)device_get_ivars(dev);
1547 	struct ifnet *ifp = netif->ifp;
1548 
1549 	DDPRINTF("%s\n", IFNAME(netif));
1550 
1551 	/* Tell the stack that the interface is no longer active */
1552 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1553 
1554 	ether_ifdetach(ifp);
1555 
1556 	bus_generic_detach(dev);
1557 
1558 	netif->attached = 0;
1559 
1560 	netif_put(netif);
1561 
1562 	return 0;
1563 }
1564 
1565 static device_method_t vif_methods[] = {
1566 	/* Device interface */
1567 	DEVMETHOD(device_probe,		vif_probe),
1568 	DEVMETHOD(device_attach, 	vif_attach),
1569 	DEVMETHOD(device_detach,	vif_detach),
1570 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
1571 	DEVMETHOD(device_suspend,	bus_generic_suspend),
1572 	DEVMETHOD(device_resume,	bus_generic_resume),
1573 	{0, 0}
1574 };
1575 
1576 static devclass_t vif_devclass;
1577 
1578 static driver_t vif_driver = {
1579 	"vif",
1580 	vif_methods,
1581 	0,
1582 };
1583 
1584 DRIVER_MODULE(vif, nexus, vif_driver, vif_devclass, 0, 0);
1585 
1586 
1587 /*
1588  * Local variables:
1589  * mode: C
1590  * c-set-style: "BSD"
1591  * c-basic-offset: 4
1592  * tab-width: 4
1593  * indent-tabs-mode: t
1594  * End:
1595  */
1596