xref: /titanic_51/usr/src/uts/common/xen/io/xnb.c (revision 1a5e258f5471356ca102c7176637cdce45bac147)
1843e1988Sjohnlev /*
2843e1988Sjohnlev  * CDDL HEADER START
3843e1988Sjohnlev  *
4843e1988Sjohnlev  * The contents of this file are subject to the terms of the
5843e1988Sjohnlev  * Common Development and Distribution License (the "License").
6843e1988Sjohnlev  * You may not use this file except in compliance with the License.
7843e1988Sjohnlev  *
8843e1988Sjohnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9843e1988Sjohnlev  * or http://www.opensolaris.org/os/licensing.
10843e1988Sjohnlev  * See the License for the specific language governing permissions
11843e1988Sjohnlev  * and limitations under the License.
12843e1988Sjohnlev  *
13843e1988Sjohnlev  * When distributing Covered Code, include this CDDL HEADER in each
14843e1988Sjohnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15843e1988Sjohnlev  * If applicable, add the following below this CDDL HEADER, with the
16843e1988Sjohnlev  * fields enclosed by brackets "[]" replaced with your own identifying
17843e1988Sjohnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
18843e1988Sjohnlev  *
19843e1988Sjohnlev  * CDDL HEADER END
20843e1988Sjohnlev  */
21843e1988Sjohnlev 
22843e1988Sjohnlev /*
23fd0939efSDavid Edmondson  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24843e1988Sjohnlev  * Use is subject to license terms.
25843e1988Sjohnlev  */
26843e1988Sjohnlev 
27843e1988Sjohnlev #ifdef DEBUG
28843e1988Sjohnlev #define	XNB_DEBUG 1
29843e1988Sjohnlev #endif /* DEBUG */
30843e1988Sjohnlev 
31843e1988Sjohnlev #include "xnb.h"
32843e1988Sjohnlev 
33843e1988Sjohnlev #include <sys/sunddi.h>
34843e1988Sjohnlev #include <sys/sunndi.h>
35843e1988Sjohnlev #include <sys/modctl.h>
36843e1988Sjohnlev #include <sys/conf.h>
37843e1988Sjohnlev #include <sys/mac.h>
3856567907SDavid Edmondson #include <sys/mac_impl.h> /* For mac_fix_cksum(). */
39843e1988Sjohnlev #include <sys/dlpi.h>
40843e1988Sjohnlev #include <sys/strsubr.h>
41843e1988Sjohnlev #include <sys/strsun.h>
42551bc2a6Smrj #include <sys/types.h>
43843e1988Sjohnlev #include <sys/pattr.h>
44843e1988Sjohnlev #include <vm/seg_kmem.h>
45843e1988Sjohnlev #include <vm/hat_i86.h>
46843e1988Sjohnlev #include <xen/sys/xenbus_impl.h>
47843e1988Sjohnlev #include <xen/sys/xendev.h>
48843e1988Sjohnlev #include <sys/balloon_impl.h>
49843e1988Sjohnlev #include <sys/evtchn_impl.h>
50843e1988Sjohnlev #include <sys/gnttab.h>
51d2b85481Srscott #include <vm/vm_dep.h>
5256567907SDavid Edmondson #include <sys/note.h>
53843e1988Sjohnlev #include <sys/gld.h>
54843e1988Sjohnlev #include <inet/ip.h>
55843e1988Sjohnlev #include <inet/ip_impl.h>
56843e1988Sjohnlev 
57843e1988Sjohnlev /*
58024c26efSMax zhen  * The terms "transmit" and "receive" are used in alignment with domU,
59024c26efSMax zhen  * which means that packets originating from the peer domU are "transmitted"
60024c26efSMax zhen  * to other parts of the system and packets are "received" from them.
61843e1988Sjohnlev  */
62843e1988Sjohnlev 
63843e1988Sjohnlev /*
6456567907SDavid Edmondson  * Should we allow guests to manipulate multicast group membership?
65843e1988Sjohnlev  */
6656567907SDavid Edmondson static boolean_t	xnb_multicast_control = B_TRUE;
67843e1988Sjohnlev 
68843e1988Sjohnlev static boolean_t	xnb_connect_rings(dev_info_t *);
69843e1988Sjohnlev static void		xnb_disconnect_rings(dev_info_t *);
70843e1988Sjohnlev static void		xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t,
71843e1988Sjohnlev     void *, void *);
72843e1988Sjohnlev static void		xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t,
73843e1988Sjohnlev     void *, void *);
74843e1988Sjohnlev 
75024c26efSMax zhen static int	xnb_txbuf_constructor(void *, void *, int);
76024c26efSMax zhen static void	xnb_txbuf_destructor(void *, void *);
7756567907SDavid Edmondson static void	xnb_tx_notify_peer(xnb_t *, boolean_t);
78024c26efSMax zhen static void	xnb_tx_mark_complete(xnb_t *, RING_IDX, int16_t);
7956567907SDavid Edmondson 
8056567907SDavid Edmondson mblk_t		*xnb_to_peer(xnb_t *, mblk_t *);
81551bc2a6Smrj mblk_t		*xnb_copy_to_peer(xnb_t *, mblk_t *);
82551bc2a6Smrj 
8356567907SDavid Edmondson static void		setup_gop(xnb_t *, gnttab_copy_t *, uchar_t *,
8456567907SDavid Edmondson     size_t, size_t, size_t, grant_ref_t);
8556567907SDavid Edmondson #pragma inline(setup_gop)
8656567907SDavid Edmondson static boolean_t	is_foreign(void *);
8756567907SDavid Edmondson #pragma inline(is_foreign)
88551bc2a6Smrj 
89843e1988Sjohnlev #define	INVALID_GRANT_HANDLE	((grant_handle_t)-1)
90843e1988Sjohnlev #define	INVALID_GRANT_REF	((grant_ref_t)-1)
91843e1988Sjohnlev 
92843e1988Sjohnlev static kmutex_t	xnb_alloc_page_lock;
93843e1988Sjohnlev 
94843e1988Sjohnlev /*
9556567907SDavid Edmondson  * On a 32 bit PAE system physical and machine addresses are larger
9656567907SDavid Edmondson  * than 32 bits.  ddi_btop() on such systems take an unsigned long
9756567907SDavid Edmondson  * argument, and so addresses above 4G are truncated before ddi_btop()
9856567907SDavid Edmondson  * gets to see them.  To avoid this, code the shift operation here.
9956567907SDavid Edmondson  */
10056567907SDavid Edmondson #define	xnb_btop(addr)	((addr) >> PAGESHIFT)
10156567907SDavid Edmondson 
10256567907SDavid Edmondson /* DMA attributes for transmit and receive data */
10356567907SDavid Edmondson static ddi_dma_attr_t buf_dma_attr = {
10456567907SDavid Edmondson 	DMA_ATTR_V0,		/* version of this structure */
10556567907SDavid Edmondson 	0,			/* lowest usable address */
10656567907SDavid Edmondson 	0xffffffffffffffffULL,	/* highest usable address */
10756567907SDavid Edmondson 	0x7fffffff,		/* maximum DMAable byte count */
10856567907SDavid Edmondson 	MMU_PAGESIZE,		/* alignment in bytes */
10956567907SDavid Edmondson 	0x7ff,			/* bitmap of burst sizes */
11056567907SDavid Edmondson 	1,			/* minimum transfer */
11156567907SDavid Edmondson 	0xffffffffU,		/* maximum transfer */
11256567907SDavid Edmondson 	0xffffffffffffffffULL,	/* maximum segment length */
11356567907SDavid Edmondson 	1,			/* maximum number of segments */
11456567907SDavid Edmondson 	1,			/* granularity */
11556567907SDavid Edmondson 	0,			/* flags (reserved) */
11656567907SDavid Edmondson };
11756567907SDavid Edmondson 
11856567907SDavid Edmondson /* DMA access attributes for data: NOT to be byte swapped. */
11956567907SDavid Edmondson static ddi_device_acc_attr_t data_accattr = {
12056567907SDavid Edmondson 	DDI_DEVICE_ATTR_V0,
12156567907SDavid Edmondson 	DDI_NEVERSWAP_ACC,
12256567907SDavid Edmondson 	DDI_STRICTORDER_ACC
12356567907SDavid Edmondson };
12456567907SDavid Edmondson 
12556567907SDavid Edmondson /*
126843e1988Sjohnlev  * Statistics.
127843e1988Sjohnlev  */
128fd0939efSDavid Edmondson static const char * const aux_statistics[] = {
129024c26efSMax zhen 	"rx_cksum_deferred",
130024c26efSMax zhen 	"tx_cksum_no_need",
131024c26efSMax zhen 	"rx_rsp_notok",
132843e1988Sjohnlev 	"tx_notify_deferred",
133843e1988Sjohnlev 	"tx_notify_sent",
134843e1988Sjohnlev 	"rx_notify_deferred",
135843e1988Sjohnlev 	"rx_notify_sent",
136843e1988Sjohnlev 	"tx_too_early",
137843e1988Sjohnlev 	"rx_too_early",
138843e1988Sjohnlev 	"rx_allocb_failed",
139551bc2a6Smrj 	"tx_allocb_failed",
140024c26efSMax zhen 	"rx_foreign_page",
141843e1988Sjohnlev 	"mac_full",
142843e1988Sjohnlev 	"spurious_intr",
143843e1988Sjohnlev 	"allocation_success",
144843e1988Sjohnlev 	"allocation_failure",
145843e1988Sjohnlev 	"small_allocation_success",
146843e1988Sjohnlev 	"small_allocation_failure",
147551bc2a6Smrj 	"other_allocation_failure",
148024c26efSMax zhen 	"rx_pageboundary_crossed",
149024c26efSMax zhen 	"rx_cpoparea_grown",
150843e1988Sjohnlev 	"csum_hardware",
151843e1988Sjohnlev 	"csum_software",
152fd0939efSDavid Edmondson 	"tx_overflow_page",
153fd0939efSDavid Edmondson 	"tx_unexpected_flags",
154843e1988Sjohnlev };
155843e1988Sjohnlev 
156843e1988Sjohnlev static int
157843e1988Sjohnlev xnb_ks_aux_update(kstat_t *ksp, int flag)
158843e1988Sjohnlev {
159843e1988Sjohnlev 	xnb_t *xnbp;
160843e1988Sjohnlev 	kstat_named_t *knp;
161843e1988Sjohnlev 
162843e1988Sjohnlev 	if (flag != KSTAT_READ)
163843e1988Sjohnlev 		return (EACCES);
164843e1988Sjohnlev 
165843e1988Sjohnlev 	xnbp = ksp->ks_private;
166843e1988Sjohnlev 	knp = ksp->ks_data;
167843e1988Sjohnlev 
168843e1988Sjohnlev 	/*
169843e1988Sjohnlev 	 * Assignment order should match that of the names in
170843e1988Sjohnlev 	 * aux_statistics.
171843e1988Sjohnlev 	 */
172024c26efSMax zhen 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_deferred;
173024c26efSMax zhen 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_no_need;
174024c26efSMax zhen 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_rsp_notok;
175551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred;
176551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent;
177551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred;
178551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent;
179551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early;
180551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early;
181551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed;
182551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed;
183024c26efSMax zhen 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_foreign_page;
184551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_mac_full;
185551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr;
186551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_allocation_success;
187551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure;
188551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success;
189551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure;
190551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure;
191024c26efSMax zhen 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_pagebndry_crossed;
192024c26efSMax zhen 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_cpoparea_grown;
193551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware;
194551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_csum_software;
195fd0939efSDavid Edmondson 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_overflow_page;
196fd0939efSDavid Edmondson 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_unexpected_flags;
197843e1988Sjohnlev 
198843e1988Sjohnlev 	return (0);
199843e1988Sjohnlev }
200843e1988Sjohnlev 
201843e1988Sjohnlev static boolean_t
202843e1988Sjohnlev xnb_ks_init(xnb_t *xnbp)
203843e1988Sjohnlev {
204843e1988Sjohnlev 	int nstat = sizeof (aux_statistics) /
205843e1988Sjohnlev 	    sizeof (aux_statistics[0]);
206fd0939efSDavid Edmondson 	const char * const *cp = aux_statistics;
207843e1988Sjohnlev 	kstat_named_t *knp;
208843e1988Sjohnlev 
209843e1988Sjohnlev 	/*
210843e1988Sjohnlev 	 * Create and initialise kstats.
211843e1988Sjohnlev 	 */
212551bc2a6Smrj 	xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo),
213551bc2a6Smrj 	    ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net",
214843e1988Sjohnlev 	    KSTAT_TYPE_NAMED, nstat, 0);
215551bc2a6Smrj 	if (xnbp->xnb_kstat_aux == NULL)
216843e1988Sjohnlev 		return (B_FALSE);
217843e1988Sjohnlev 
218551bc2a6Smrj 	xnbp->xnb_kstat_aux->ks_private = xnbp;
219551bc2a6Smrj 	xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update;
220843e1988Sjohnlev 
221551bc2a6Smrj 	knp = xnbp->xnb_kstat_aux->ks_data;
222843e1988Sjohnlev 	while (nstat > 0) {
223843e1988Sjohnlev 		kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
224843e1988Sjohnlev 
225843e1988Sjohnlev 		knp++;
226843e1988Sjohnlev 		cp++;
227843e1988Sjohnlev 		nstat--;
228843e1988Sjohnlev 	}
229843e1988Sjohnlev 
230551bc2a6Smrj 	kstat_install(xnbp->xnb_kstat_aux);
231843e1988Sjohnlev 
232843e1988Sjohnlev 	return (B_TRUE);
233843e1988Sjohnlev }
234843e1988Sjohnlev 
235843e1988Sjohnlev static void
236843e1988Sjohnlev xnb_ks_free(xnb_t *xnbp)
237843e1988Sjohnlev {
238551bc2a6Smrj 	kstat_delete(xnbp->xnb_kstat_aux);
239843e1988Sjohnlev }
240843e1988Sjohnlev 
241843e1988Sjohnlev /*
24256567907SDavid Edmondson  * Calculate and insert the transport checksum for an arbitrary packet.
243843e1988Sjohnlev  */
244843e1988Sjohnlev static mblk_t *
245843e1988Sjohnlev xnb_software_csum(xnb_t *xnbp, mblk_t *mp)
246843e1988Sjohnlev {
24756567907SDavid Edmondson 	_NOTE(ARGUNUSED(xnbp));
24856567907SDavid Edmondson 
249843e1988Sjohnlev 	/*
25056567907SDavid Edmondson 	 * XXPV dme: shouldn't rely on mac_fix_cksum(), not least
251843e1988Sjohnlev 	 * because it doesn't cover all of the interesting cases :-(
252843e1988Sjohnlev 	 */
2530dc2366fSVenugopal Iyer 	mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM);
254843e1988Sjohnlev 
255da14cebeSEric Cheng 	return (mac_fix_cksum(mp));
256843e1988Sjohnlev }
257843e1988Sjohnlev 
258843e1988Sjohnlev mblk_t *
259843e1988Sjohnlev xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab)
260843e1988Sjohnlev {
261843e1988Sjohnlev 	struct ether_header *ehp;
262843e1988Sjohnlev 	uint16_t sap;
263843e1988Sjohnlev 	uint32_t offset;
264843e1988Sjohnlev 	ipha_t *ipha;
265843e1988Sjohnlev 
266843e1988Sjohnlev 	ASSERT(mp->b_next == NULL);
267843e1988Sjohnlev 
268843e1988Sjohnlev 	/*
269843e1988Sjohnlev 	 * Check that the packet is contained in a single mblk.  In
27056567907SDavid Edmondson 	 * the "from peer" path this is true today, but may change
271843e1988Sjohnlev 	 * when scatter gather support is added.  In the "to peer"
272843e1988Sjohnlev 	 * path we cannot be sure, but in most cases it will be true
273843e1988Sjohnlev 	 * (in the xnbo case the packet has come from a MAC device
274843e1988Sjohnlev 	 * which is unlikely to split packets).
275843e1988Sjohnlev 	 */
276843e1988Sjohnlev 	if (mp->b_cont != NULL)
277843e1988Sjohnlev 		goto software;
278843e1988Sjohnlev 
279843e1988Sjohnlev 	/*
280843e1988Sjohnlev 	 * If the MAC has no hardware capability don't do any further
281843e1988Sjohnlev 	 * checking.
282843e1988Sjohnlev 	 */
283843e1988Sjohnlev 	if (capab == 0)
284843e1988Sjohnlev 		goto software;
285843e1988Sjohnlev 
286843e1988Sjohnlev 	ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
287843e1988Sjohnlev 	ehp = (struct ether_header *)mp->b_rptr;
288843e1988Sjohnlev 
289843e1988Sjohnlev 	if (ntohs(ehp->ether_type) == VLAN_TPID) {
290843e1988Sjohnlev 		struct ether_vlan_header *evhp;
291843e1988Sjohnlev 
292843e1988Sjohnlev 		ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header));
293843e1988Sjohnlev 		evhp = (struct ether_vlan_header *)mp->b_rptr;
294843e1988Sjohnlev 		sap = ntohs(evhp->ether_type);
295843e1988Sjohnlev 		offset = sizeof (struct ether_vlan_header);
296843e1988Sjohnlev 	} else {
297843e1988Sjohnlev 		sap = ntohs(ehp->ether_type);
298843e1988Sjohnlev 		offset = sizeof (struct ether_header);
299843e1988Sjohnlev 	}
300843e1988Sjohnlev 
301843e1988Sjohnlev 	/*
302843e1988Sjohnlev 	 * We only attempt to do IPv4 packets in hardware.
303843e1988Sjohnlev 	 */
304843e1988Sjohnlev 	if (sap != ETHERTYPE_IP)
305843e1988Sjohnlev 		goto software;
306843e1988Sjohnlev 
307843e1988Sjohnlev 	/*
308843e1988Sjohnlev 	 * We know that this is an IPv4 packet.
309843e1988Sjohnlev 	 */
310843e1988Sjohnlev 	ipha = (ipha_t *)(mp->b_rptr + offset);
311843e1988Sjohnlev 
312843e1988Sjohnlev 	switch (ipha->ipha_protocol) {
313843e1988Sjohnlev 	case IPPROTO_TCP:
314a859da42SDavid Edmondson 	case IPPROTO_UDP: {
315a859da42SDavid Edmondson 		uint32_t start, length, stuff, cksum;
316a859da42SDavid Edmondson 		uint16_t *stuffp;
317a859da42SDavid Edmondson 
318843e1988Sjohnlev 		/*
319a859da42SDavid Edmondson 		 * This is a TCP/IPv4 or UDP/IPv4 packet, for which we
320a859da42SDavid Edmondson 		 * can use full IPv4 and partial checksum offload.
321843e1988Sjohnlev 		 */
322a859da42SDavid Edmondson 		if ((capab & (HCKSUM_INET_FULL_V4|HCKSUM_INET_PARTIAL)) == 0)
323a859da42SDavid Edmondson 			break;
324a859da42SDavid Edmondson 
325a859da42SDavid Edmondson 		start = IP_SIMPLE_HDR_LENGTH;
326a859da42SDavid Edmondson 		length = ntohs(ipha->ipha_length);
327a859da42SDavid Edmondson 		if (ipha->ipha_protocol == IPPROTO_TCP) {
328a859da42SDavid Edmondson 			stuff = start + TCP_CHECKSUM_OFFSET;
329a859da42SDavid Edmondson 			cksum = IP_TCP_CSUM_COMP;
330a859da42SDavid Edmondson 		} else {
331a859da42SDavid Edmondson 			stuff = start + UDP_CHECKSUM_OFFSET;
332a859da42SDavid Edmondson 			cksum = IP_UDP_CSUM_COMP;
333a859da42SDavid Edmondson 		}
334a859da42SDavid Edmondson 		stuffp = (uint16_t *)(mp->b_rptr + offset + stuff);
335a859da42SDavid Edmondson 
336a859da42SDavid Edmondson 		if (capab & HCKSUM_INET_FULL_V4) {
337a859da42SDavid Edmondson 			/*
338a859da42SDavid Edmondson 			 * Some devices require that the checksum
339a859da42SDavid Edmondson 			 * field of the packet is zero for full
340a859da42SDavid Edmondson 			 * offload.
341a859da42SDavid Edmondson 			 */
342a859da42SDavid Edmondson 			*stuffp = 0;
343a859da42SDavid Edmondson 
3440dc2366fSVenugopal Iyer 			mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM);
345843e1988Sjohnlev 
346551bc2a6Smrj 			xnbp->xnb_stat_csum_hardware++;
347843e1988Sjohnlev 
348843e1988Sjohnlev 			return (mp);
349843e1988Sjohnlev 		}
350843e1988Sjohnlev 
351a859da42SDavid Edmondson 		if (capab & HCKSUM_INET_PARTIAL) {
352a859da42SDavid Edmondson 			if (*stuffp == 0) {
353a859da42SDavid Edmondson 				ipaddr_t src, dst;
354843e1988Sjohnlev 
355a859da42SDavid Edmondson 				/*
356a859da42SDavid Edmondson 				 * Older Solaris guests don't insert
357a859da42SDavid Edmondson 				 * the pseudo-header checksum, so we
358a859da42SDavid Edmondson 				 * calculate it here.
359a859da42SDavid Edmondson 				 */
360a859da42SDavid Edmondson 				src = ipha->ipha_src;
361a859da42SDavid Edmondson 				dst = ipha->ipha_dst;
362a859da42SDavid Edmondson 
363a859da42SDavid Edmondson 				cksum += (dst >> 16) + (dst & 0xFFFF);
364a859da42SDavid Edmondson 				cksum += (src >> 16) + (src & 0xFFFF);
365a859da42SDavid Edmondson 				cksum += length - IP_SIMPLE_HDR_LENGTH;
366a859da42SDavid Edmondson 
367a859da42SDavid Edmondson 				cksum = (cksum >> 16) + (cksum & 0xFFFF);
368a859da42SDavid Edmondson 				cksum = (cksum >> 16) + (cksum & 0xFFFF);
369a859da42SDavid Edmondson 
370a859da42SDavid Edmondson 				ASSERT(cksum <= 0xFFFF);
371a859da42SDavid Edmondson 
372a859da42SDavid Edmondson 				*stuffp = (uint16_t)(cksum ? cksum : ~cksum);
373a859da42SDavid Edmondson 			}
374a859da42SDavid Edmondson 
3750dc2366fSVenugopal Iyer 			mac_hcksum_set(mp, start, stuff, length, 0,
3760dc2366fSVenugopal Iyer 			    HCK_PARTIALCKSUM);
377a859da42SDavid Edmondson 
378a859da42SDavid Edmondson 			xnbp->xnb_stat_csum_hardware++;
379a859da42SDavid Edmondson 
380a859da42SDavid Edmondson 			return (mp);
381a859da42SDavid Edmondson 		}
382a859da42SDavid Edmondson 
383a859da42SDavid Edmondson 		/* NOTREACHED */
384843e1988Sjohnlev 		break;
385a859da42SDavid Edmondson 	}
386843e1988Sjohnlev 
387843e1988Sjohnlev 	default:
388843e1988Sjohnlev 		/* Use software. */
389843e1988Sjohnlev 		break;
390843e1988Sjohnlev 	}
391843e1988Sjohnlev 
392843e1988Sjohnlev software:
393843e1988Sjohnlev 	/*
394843e1988Sjohnlev 	 * We are not able to use any offload so do the whole thing in
395843e1988Sjohnlev 	 * software.
396843e1988Sjohnlev 	 */
397551bc2a6Smrj 	xnbp->xnb_stat_csum_software++;
398843e1988Sjohnlev 
399843e1988Sjohnlev 	return (xnb_software_csum(xnbp, mp));
400843e1988Sjohnlev }
401843e1988Sjohnlev 
402843e1988Sjohnlev int
403843e1988Sjohnlev xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data)
404843e1988Sjohnlev {
405843e1988Sjohnlev 	xnb_t *xnbp;
40656567907SDavid Edmondson 	char *xsname;
40756567907SDavid Edmondson 	char cachename[32];
408843e1988Sjohnlev 
409843e1988Sjohnlev 	xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP);
410843e1988Sjohnlev 
411551bc2a6Smrj 	xnbp->xnb_flavour = flavour;
412551bc2a6Smrj 	xnbp->xnb_flavour_data = flavour_data;
413551bc2a6Smrj 	xnbp->xnb_devinfo = dip;
414551bc2a6Smrj 	xnbp->xnb_evtchn = INVALID_EVTCHN;
415551bc2a6Smrj 	xnbp->xnb_irq = B_FALSE;
416551bc2a6Smrj 	xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE;
417551bc2a6Smrj 	xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE;
418551bc2a6Smrj 	xnbp->xnb_connected = B_FALSE;
419551bc2a6Smrj 	xnbp->xnb_hotplugged = B_FALSE;
420551bc2a6Smrj 	xnbp->xnb_detachable = B_FALSE;
421551bc2a6Smrj 	xnbp->xnb_peer = xvdi_get_oeid(dip);
42256567907SDavid Edmondson 	xnbp->xnb_be_status = XNB_STATE_INIT;
42356567907SDavid Edmondson 	xnbp->xnb_fe_status = XNB_STATE_INIT;
424843e1988Sjohnlev 
425024c26efSMax zhen 	xnbp->xnb_tx_buf_count = 0;
426843e1988Sjohnlev 
42756567907SDavid Edmondson 	xnbp->xnb_rx_hv_copy = B_FALSE;
42856567907SDavid Edmondson 	xnbp->xnb_multicast_control = B_FALSE;
429843e1988Sjohnlev 
430024c26efSMax zhen 	xnbp->xnb_rx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
431024c26efSMax zhen 	ASSERT(xnbp->xnb_rx_va != NULL);
432551bc2a6Smrj 
433551bc2a6Smrj 	if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie)
434843e1988Sjohnlev 	    != DDI_SUCCESS)
435843e1988Sjohnlev 		goto failure;
436843e1988Sjohnlev 
43756567907SDavid Edmondson 	/* Allocated on demand, when/if we enter xnb_copy_to_peer(). */
438024c26efSMax zhen 	xnbp->xnb_rx_cpop = NULL;
43956567907SDavid Edmondson 	xnbp->xnb_rx_cpop_count = 0;
440551bc2a6Smrj 
441551bc2a6Smrj 	mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER,
442551bc2a6Smrj 	    xnbp->xnb_icookie);
443551bc2a6Smrj 	mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER,
444551bc2a6Smrj 	    xnbp->xnb_icookie);
44556567907SDavid Edmondson 	mutex_init(&xnbp->xnb_state_lock, NULL, MUTEX_DRIVER,
44656567907SDavid Edmondson 	    xnbp->xnb_icookie);
447843e1988Sjohnlev 
44856567907SDavid Edmondson 	/* Set driver private pointer now. */
449843e1988Sjohnlev 	ddi_set_driver_private(dip, xnbp);
450843e1988Sjohnlev 
45156567907SDavid Edmondson 	(void) sprintf(cachename, "xnb_tx_buf_cache_%d", ddi_get_instance(dip));
45256567907SDavid Edmondson 	xnbp->xnb_tx_buf_cache = kmem_cache_create(cachename,
45356567907SDavid Edmondson 	    sizeof (xnb_txbuf_t), 0,
45456567907SDavid Edmondson 	    xnb_txbuf_constructor, xnb_txbuf_destructor,
45556567907SDavid Edmondson 	    NULL, xnbp, NULL, 0);
45656567907SDavid Edmondson 	if (xnbp->xnb_tx_buf_cache == NULL)
45756567907SDavid Edmondson 		goto failure_0;
45856567907SDavid Edmondson 
459843e1988Sjohnlev 	if (!xnb_ks_init(xnbp))
460551bc2a6Smrj 		goto failure_1;
461843e1988Sjohnlev 
462843e1988Sjohnlev 	/*
463843e1988Sjohnlev 	 * Receive notification of changes in the state of the
464843e1988Sjohnlev 	 * driver in the guest domain.
465843e1988Sjohnlev 	 */
4667eea693dSMark Johnson 	if (xvdi_add_event_handler(dip, XS_OE_STATE, xnb_oe_state_change,
4677eea693dSMark Johnson 	    NULL) != DDI_SUCCESS)
468551bc2a6Smrj 		goto failure_2;
469843e1988Sjohnlev 
470843e1988Sjohnlev 	/*
471843e1988Sjohnlev 	 * Receive notification of hotplug events.
472843e1988Sjohnlev 	 */
4737eea693dSMark Johnson 	if (xvdi_add_event_handler(dip, XS_HP_STATE, xnb_hp_state_change,
4747eea693dSMark Johnson 	    NULL) != DDI_SUCCESS)
475551bc2a6Smrj 		goto failure_2;
476843e1988Sjohnlev 
477843e1988Sjohnlev 	xsname = xvdi_get_xsname(dip);
478843e1988Sjohnlev 
479843e1988Sjohnlev 	if (xenbus_printf(XBT_NULL, xsname,
48056567907SDavid Edmondson 	    "feature-multicast-control", "%d",
48156567907SDavid Edmondson 	    xnb_multicast_control ? 1 : 0) != 0)
482551bc2a6Smrj 		goto failure_3;
483551bc2a6Smrj 
484551bc2a6Smrj 	if (xenbus_printf(XBT_NULL, xsname,
48556567907SDavid Edmondson 	    "feature-rx-copy", "%d",  1) != 0)
486551bc2a6Smrj 		goto failure_3;
487551bc2a6Smrj 	/*
488551bc2a6Smrj 	 * Linux domUs seem to depend on "feature-rx-flip" being 0
489551bc2a6Smrj 	 * in addition to "feature-rx-copy" being 1. It seems strange
490551bc2a6Smrj 	 * to use four possible states to describe a binary decision,
491551bc2a6Smrj 	 * but we might as well play nice.
492551bc2a6Smrj 	 */
493551bc2a6Smrj 	if (xenbus_printf(XBT_NULL, xsname,
49456567907SDavid Edmondson 	    "feature-rx-flip", "%d", 0) != 0)
495551bc2a6Smrj 		goto failure_3;
496843e1988Sjohnlev 
497843e1988Sjohnlev 	(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait);
498843e1988Sjohnlev 	(void) xvdi_post_event(dip, XEN_HP_ADD);
499843e1988Sjohnlev 
500843e1988Sjohnlev 	return (DDI_SUCCESS);
501843e1988Sjohnlev 
502551bc2a6Smrj failure_3:
503843e1988Sjohnlev 	xvdi_remove_event_handler(dip, NULL);
504843e1988Sjohnlev 
505551bc2a6Smrj failure_2:
506843e1988Sjohnlev 	xnb_ks_free(xnbp);
507843e1988Sjohnlev 
508551bc2a6Smrj failure_1:
50956567907SDavid Edmondson 	kmem_cache_destroy(xnbp->xnb_tx_buf_cache);
51056567907SDavid Edmondson 
51156567907SDavid Edmondson failure_0:
51256567907SDavid Edmondson 	mutex_destroy(&xnbp->xnb_state_lock);
513551bc2a6Smrj 	mutex_destroy(&xnbp->xnb_rx_lock);
514551bc2a6Smrj 	mutex_destroy(&xnbp->xnb_tx_lock);
515843e1988Sjohnlev 
516843e1988Sjohnlev failure:
517024c26efSMax zhen 	vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE);
518843e1988Sjohnlev 	kmem_free(xnbp, sizeof (*xnbp));
519843e1988Sjohnlev 	return (DDI_FAILURE);
520843e1988Sjohnlev }
521843e1988Sjohnlev 
522843e1988Sjohnlev void
523843e1988Sjohnlev xnb_detach(dev_info_t *dip)
524843e1988Sjohnlev {
525843e1988Sjohnlev 	xnb_t *xnbp = ddi_get_driver_private(dip);
526843e1988Sjohnlev 
527843e1988Sjohnlev 	ASSERT(xnbp != NULL);
528551bc2a6Smrj 	ASSERT(!xnbp->xnb_connected);
529024c26efSMax zhen 	ASSERT(xnbp->xnb_tx_buf_count == 0);
530843e1988Sjohnlev 
531843e1988Sjohnlev 	xnb_disconnect_rings(dip);
532843e1988Sjohnlev 
533843e1988Sjohnlev 	xvdi_remove_event_handler(dip, NULL);
534843e1988Sjohnlev 
535843e1988Sjohnlev 	xnb_ks_free(xnbp);
536843e1988Sjohnlev 
53756567907SDavid Edmondson 	kmem_cache_destroy(xnbp->xnb_tx_buf_cache);
53856567907SDavid Edmondson 
539843e1988Sjohnlev 	ddi_set_driver_private(dip, NULL);
540843e1988Sjohnlev 
54156567907SDavid Edmondson 	mutex_destroy(&xnbp->xnb_state_lock);
542551bc2a6Smrj 	mutex_destroy(&xnbp->xnb_rx_lock);
54356567907SDavid Edmondson 	mutex_destroy(&xnbp->xnb_tx_lock);
544843e1988Sjohnlev 
54556567907SDavid Edmondson 	if (xnbp->xnb_rx_cpop_count > 0)
54656567907SDavid Edmondson 		kmem_free(xnbp->xnb_rx_cpop, sizeof (xnbp->xnb_rx_cpop[0])
54756567907SDavid Edmondson 		    * xnbp->xnb_rx_cpop_count);
548551bc2a6Smrj 
549024c26efSMax zhen 	ASSERT(xnbp->xnb_rx_va != NULL);
550024c26efSMax zhen 	vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE);
551843e1988Sjohnlev 
552843e1988Sjohnlev 	kmem_free(xnbp, sizeof (*xnbp));
553843e1988Sjohnlev }
554843e1988Sjohnlev 
55556567907SDavid Edmondson /*
55656567907SDavid Edmondson  * Allocate a page from the hypervisor to be flipped to the peer.
55756567907SDavid Edmondson  *
55856567907SDavid Edmondson  * Try to get pages in batches to reduce the overhead of calls into
55956567907SDavid Edmondson  * the balloon driver.
56056567907SDavid Edmondson  */
561843e1988Sjohnlev static mfn_t
562843e1988Sjohnlev xnb_alloc_page(xnb_t *xnbp)
563843e1988Sjohnlev {
564843e1988Sjohnlev #define	WARNING_RATE_LIMIT 100
565843e1988Sjohnlev #define	BATCH_SIZE 256
566843e1988Sjohnlev 	static mfn_t mfns[BATCH_SIZE];	/* common across all instances */
567843e1988Sjohnlev 	static int nth = BATCH_SIZE;
568843e1988Sjohnlev 	mfn_t mfn;
569843e1988Sjohnlev 
570843e1988Sjohnlev 	mutex_enter(&xnb_alloc_page_lock);
571843e1988Sjohnlev 	if (nth == BATCH_SIZE) {
572843e1988Sjohnlev 		if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) {
573551bc2a6Smrj 			xnbp->xnb_stat_allocation_failure++;
574843e1988Sjohnlev 			mutex_exit(&xnb_alloc_page_lock);
575843e1988Sjohnlev 
576843e1988Sjohnlev 			/*
577843e1988Sjohnlev 			 * Try for a single page in low memory situations.
578843e1988Sjohnlev 			 */
579843e1988Sjohnlev 			if (balloon_alloc_pages(1, &mfn) != 1) {
580551bc2a6Smrj 				if ((xnbp->xnb_stat_small_allocation_failure++
581551bc2a6Smrj 				    % WARNING_RATE_LIMIT) == 0)
582843e1988Sjohnlev 					cmn_err(CE_WARN, "xnb_alloc_page: "
583843e1988Sjohnlev 					    "Cannot allocate memory to "
584843e1988Sjohnlev 					    "transfer packets to peer.");
585843e1988Sjohnlev 				return (0);
586843e1988Sjohnlev 			} else {
587551bc2a6Smrj 				xnbp->xnb_stat_small_allocation_success++;
588843e1988Sjohnlev 				return (mfn);
589843e1988Sjohnlev 			}
590843e1988Sjohnlev 		}
591843e1988Sjohnlev 
592843e1988Sjohnlev 		nth = 0;
593551bc2a6Smrj 		xnbp->xnb_stat_allocation_success++;
594843e1988Sjohnlev 	}
595843e1988Sjohnlev 
596843e1988Sjohnlev 	mfn = mfns[nth++];
597843e1988Sjohnlev 	mutex_exit(&xnb_alloc_page_lock);
598843e1988Sjohnlev 
599843e1988Sjohnlev 	ASSERT(mfn != 0);
600843e1988Sjohnlev 
601843e1988Sjohnlev 	return (mfn);
602843e1988Sjohnlev #undef BATCH_SIZE
603843e1988Sjohnlev #undef WARNING_RATE_LIMIT
604843e1988Sjohnlev }
605843e1988Sjohnlev 
60656567907SDavid Edmondson /*
60756567907SDavid Edmondson  * Free a page back to the hypervisor.
60856567907SDavid Edmondson  *
60956567907SDavid Edmondson  * This happens only in the error path, so batching is not worth the
61056567907SDavid Edmondson  * complication.
61156567907SDavid Edmondson  */
612843e1988Sjohnlev static void
613843e1988Sjohnlev xnb_free_page(xnb_t *xnbp, mfn_t mfn)
614843e1988Sjohnlev {
61556567907SDavid Edmondson 	_NOTE(ARGUNUSED(xnbp));
616843e1988Sjohnlev 	int r;
617d2b85481Srscott 	pfn_t pfn;
618d2b85481Srscott 
619d2b85481Srscott 	pfn = xen_assign_pfn(mfn);
620d2b85481Srscott 	pfnzero(pfn, 0, PAGESIZE);
621d2b85481Srscott 	xen_release_pfn(pfn);
622843e1988Sjohnlev 
623843e1988Sjohnlev 	if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) {
624843e1988Sjohnlev 		cmn_err(CE_WARN, "free_page: cannot decrease memory "
625843e1988Sjohnlev 		    "reservation (%d): page kept but unusable (mfn = 0x%lx).",
626843e1988Sjohnlev 		    r, mfn);
627843e1988Sjohnlev 	}
628843e1988Sjohnlev }
629843e1988Sjohnlev 
630551bc2a6Smrj /*
63156567907SDavid Edmondson  * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but using
63256567907SDavid Edmondson  * local variables. Used in both xnb_to_peer() and xnb_copy_to_peer().
633551bc2a6Smrj  */
634551bc2a6Smrj #define	XNB_RING_HAS_UNCONSUMED_REQUESTS(_r)		\
635551bc2a6Smrj 	((((_r)->sring->req_prod - loop) <		\
636551bc2a6Smrj 		(RING_SIZE(_r) - (loop - prod))) ?	\
637551bc2a6Smrj 	    ((_r)->sring->req_prod - loop) :		\
638551bc2a6Smrj 	    (RING_SIZE(_r) - (loop - prod)))
639551bc2a6Smrj 
64056567907SDavid Edmondson /*
64156567907SDavid Edmondson  * Pass packets to the peer using page flipping.
64256567907SDavid Edmondson  */
643843e1988Sjohnlev mblk_t *
644843e1988Sjohnlev xnb_to_peer(xnb_t *xnbp, mblk_t *mp)
645843e1988Sjohnlev {
646843e1988Sjohnlev 	mblk_t *free = mp, *prev = NULL;
647843e1988Sjohnlev 	size_t len;
648843e1988Sjohnlev 	gnttab_transfer_t *gop;
649843e1988Sjohnlev 	boolean_t notify;
650843e1988Sjohnlev 	RING_IDX loop, prod, end;
651843e1988Sjohnlev 
652843e1988Sjohnlev 	/*
653843e1988Sjohnlev 	 * For each packet the sequence of operations is:
654843e1988Sjohnlev 	 *
655843e1988Sjohnlev 	 * 1. get a new page from the hypervisor.
656843e1988Sjohnlev 	 * 2. get a request slot from the ring.
657843e1988Sjohnlev 	 * 3. copy the data into the new page.
658843e1988Sjohnlev 	 * 4. transfer the page to the peer.
659843e1988Sjohnlev 	 * 5. update the request slot.
660843e1988Sjohnlev 	 * 6. kick the peer.
661843e1988Sjohnlev 	 * 7. free mp.
662843e1988Sjohnlev 	 *
663843e1988Sjohnlev 	 * In order to reduce the number of hypercalls, we prepare
664843e1988Sjohnlev 	 * several packets for the peer and perform a single hypercall
665843e1988Sjohnlev 	 * to transfer them.
666843e1988Sjohnlev 	 */
667843e1988Sjohnlev 
668024c26efSMax zhen 	mutex_enter(&xnbp->xnb_rx_lock);
669843e1988Sjohnlev 
670843e1988Sjohnlev 	/*
671843e1988Sjohnlev 	 * If we are not connected to the peer or have not yet
672843e1988Sjohnlev 	 * finished hotplug it is too early to pass packets to the
673843e1988Sjohnlev 	 * peer.
674843e1988Sjohnlev 	 */
675551bc2a6Smrj 	if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) {
676024c26efSMax zhen 		mutex_exit(&xnbp->xnb_rx_lock);
677024c26efSMax zhen 		DTRACE_PROBE(flip_rx_too_early);
678024c26efSMax zhen 		xnbp->xnb_stat_rx_too_early++;
679843e1988Sjohnlev 		return (mp);
680843e1988Sjohnlev 	}
681843e1988Sjohnlev 
682551bc2a6Smrj 	loop = xnbp->xnb_rx_ring.req_cons;
683551bc2a6Smrj 	prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
684024c26efSMax zhen 	gop = xnbp->xnb_rx_top;
685843e1988Sjohnlev 
686843e1988Sjohnlev 	while ((mp != NULL) &&
687551bc2a6Smrj 	    XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) {
688843e1988Sjohnlev 
689843e1988Sjohnlev 		mfn_t mfn;
690843e1988Sjohnlev 		pfn_t pfn;
691843e1988Sjohnlev 		netif_rx_request_t *rxreq;
692843e1988Sjohnlev 		netif_rx_response_t *rxresp;
693843e1988Sjohnlev 		char *valoop;
694843e1988Sjohnlev 		mblk_t *ml;
695843e1988Sjohnlev 		uint16_t cksum_flags;
696843e1988Sjohnlev 
697843e1988Sjohnlev 		/* 1 */
698843e1988Sjohnlev 		if ((mfn = xnb_alloc_page(xnbp)) == 0) {
699024c26efSMax zhen 			xnbp->xnb_stat_rx_defer++;
700843e1988Sjohnlev 			break;
701843e1988Sjohnlev 		}
702843e1988Sjohnlev 
703843e1988Sjohnlev 		/* 2 */
704551bc2a6Smrj 		rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop);
705843e1988Sjohnlev 
706843e1988Sjohnlev #ifdef XNB_DEBUG
707843e1988Sjohnlev 		if (!(rxreq->id < NET_RX_RING_SIZE))
708843e1988Sjohnlev 			cmn_err(CE_PANIC, "xnb_to_peer: "
709843e1988Sjohnlev 			    "id %d out of range in request 0x%p",
710843e1988Sjohnlev 			    rxreq->id, (void *)rxreq);
711843e1988Sjohnlev #endif /* XNB_DEBUG */
712843e1988Sjohnlev 
713843e1988Sjohnlev 		/* Assign a pfn and map the new page at the allocated va. */
714843e1988Sjohnlev 		pfn = xen_assign_pfn(mfn);
715024c26efSMax zhen 		hat_devload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE,
716843e1988Sjohnlev 		    pfn, PROT_READ | PROT_WRITE, HAT_LOAD);
717843e1988Sjohnlev 
718843e1988Sjohnlev 		/* 3 */
719843e1988Sjohnlev 		len = 0;
7206ac4daadSDavid Edmondson 		valoop = xnbp->xnb_rx_va;
721843e1988Sjohnlev 		for (ml = mp; ml != NULL; ml = ml->b_cont) {
722843e1988Sjohnlev 			size_t chunk = ml->b_wptr - ml->b_rptr;
723843e1988Sjohnlev 
724843e1988Sjohnlev 			bcopy(ml->b_rptr, valoop, chunk);
725843e1988Sjohnlev 			valoop += chunk;
726843e1988Sjohnlev 			len += chunk;
727843e1988Sjohnlev 		}
728843e1988Sjohnlev 
7296ac4daadSDavid Edmondson 		ASSERT(len < PAGESIZE);
730843e1988Sjohnlev 
731843e1988Sjohnlev 		/* Release the pfn. */
732024c26efSMax zhen 		hat_unload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE,
733843e1988Sjohnlev 		    HAT_UNLOAD_UNMAP);
734843e1988Sjohnlev 		xen_release_pfn(pfn);
735843e1988Sjohnlev 
736843e1988Sjohnlev 		/* 4 */
737843e1988Sjohnlev 		gop->mfn = mfn;
738551bc2a6Smrj 		gop->domid = xnbp->xnb_peer;
739843e1988Sjohnlev 		gop->ref = rxreq->gref;
740843e1988Sjohnlev 
741843e1988Sjohnlev 		/* 5.1 */
742551bc2a6Smrj 		rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod);
7436ac4daadSDavid Edmondson 		rxresp->offset = 0;
744843e1988Sjohnlev 		rxresp->flags = 0;
745843e1988Sjohnlev 
746551bc2a6Smrj 		cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp);
747843e1988Sjohnlev 		if (cksum_flags != 0)
748024c26efSMax zhen 			xnbp->xnb_stat_rx_cksum_deferred++;
749843e1988Sjohnlev 		rxresp->flags |= cksum_flags;
750843e1988Sjohnlev 
751551bc2a6Smrj 		rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id;
752843e1988Sjohnlev 		rxresp->status = len;
753843e1988Sjohnlev 
754843e1988Sjohnlev 		loop++;
755843e1988Sjohnlev 		prod++;
756843e1988Sjohnlev 		gop++;
757843e1988Sjohnlev 		prev = mp;
758843e1988Sjohnlev 		mp = mp->b_next;
759843e1988Sjohnlev 	}
760843e1988Sjohnlev 
761843e1988Sjohnlev 	/*
762843e1988Sjohnlev 	 * Did we actually do anything?
763843e1988Sjohnlev 	 */
764551bc2a6Smrj 	if (loop == xnbp->xnb_rx_ring.req_cons) {
765024c26efSMax zhen 		mutex_exit(&xnbp->xnb_rx_lock);
766843e1988Sjohnlev 		return (mp);
767843e1988Sjohnlev 	}
768843e1988Sjohnlev 
769843e1988Sjohnlev 	end = loop;
770843e1988Sjohnlev 
771843e1988Sjohnlev 	/*
772843e1988Sjohnlev 	 * Unlink the end of the 'done' list from the remainder.
773843e1988Sjohnlev 	 */
774843e1988Sjohnlev 	ASSERT(prev != NULL);
775843e1988Sjohnlev 	prev->b_next = NULL;
776843e1988Sjohnlev 
777024c26efSMax zhen 	if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_rx_top,
778551bc2a6Smrj 	    loop - xnbp->xnb_rx_ring.req_cons) != 0) {
779843e1988Sjohnlev 		cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed");
780843e1988Sjohnlev 	}
781843e1988Sjohnlev 
782551bc2a6Smrj 	loop = xnbp->xnb_rx_ring.req_cons;
783551bc2a6Smrj 	prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
784024c26efSMax zhen 	gop = xnbp->xnb_rx_top;
785843e1988Sjohnlev 
786843e1988Sjohnlev 	while (loop < end) {
787843e1988Sjohnlev 		int16_t status = NETIF_RSP_OKAY;
788843e1988Sjohnlev 
789843e1988Sjohnlev 		if (gop->status != 0) {
790843e1988Sjohnlev 			status = NETIF_RSP_ERROR;
791843e1988Sjohnlev 
792843e1988Sjohnlev 			/*
793843e1988Sjohnlev 			 * If the status is anything other than
794843e1988Sjohnlev 			 * GNTST_bad_page then we don't own the page
795843e1988Sjohnlev 			 * any more, so don't try to give it back.
796843e1988Sjohnlev 			 */
797843e1988Sjohnlev 			if (gop->status != GNTST_bad_page)
798843e1988Sjohnlev 				gop->mfn = 0;
799843e1988Sjohnlev 		} else {
800843e1988Sjohnlev 			/* The page is no longer ours. */
801843e1988Sjohnlev 			gop->mfn = 0;
802843e1988Sjohnlev 		}
803843e1988Sjohnlev 
804843e1988Sjohnlev 		if (gop->mfn != 0)
805843e1988Sjohnlev 			/*
806843e1988Sjohnlev 			 * Give back the page, as we won't be using
807843e1988Sjohnlev 			 * it.
808843e1988Sjohnlev 			 */
809843e1988Sjohnlev 			xnb_free_page(xnbp, gop->mfn);
810843e1988Sjohnlev 		else
811843e1988Sjohnlev 			/*
812843e1988Sjohnlev 			 * We gave away a page, update our accounting
813843e1988Sjohnlev 			 * now.
814843e1988Sjohnlev 			 */
815843e1988Sjohnlev 			balloon_drv_subtracted(1);
816843e1988Sjohnlev 
817843e1988Sjohnlev 		/* 5.2 */
818843e1988Sjohnlev 		if (status != NETIF_RSP_OKAY) {
819551bc2a6Smrj 			RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status =
820843e1988Sjohnlev 			    status;
821843e1988Sjohnlev 		} else {
822024c26efSMax zhen 			xnbp->xnb_stat_ipackets++;
823024c26efSMax zhen 			xnbp->xnb_stat_rbytes += len;
824843e1988Sjohnlev 		}
825843e1988Sjohnlev 
826843e1988Sjohnlev 		loop++;
827843e1988Sjohnlev 		prod++;
828843e1988Sjohnlev 		gop++;
829843e1988Sjohnlev 	}
830843e1988Sjohnlev 
831551bc2a6Smrj 	xnbp->xnb_rx_ring.req_cons = loop;
832551bc2a6Smrj 	xnbp->xnb_rx_ring.rsp_prod_pvt = prod;
833843e1988Sjohnlev 
834843e1988Sjohnlev 	/* 6 */
835843e1988Sjohnlev 	/* LINTED: constant in conditional context */
836551bc2a6Smrj 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify);
837843e1988Sjohnlev 	if (notify) {
838551bc2a6Smrj 		ec_notify_via_evtchn(xnbp->xnb_evtchn);
839024c26efSMax zhen 		xnbp->xnb_stat_rx_notify_sent++;
840843e1988Sjohnlev 	} else {
841024c26efSMax zhen 		xnbp->xnb_stat_rx_notify_deferred++;
842843e1988Sjohnlev 	}
843843e1988Sjohnlev 
844843e1988Sjohnlev 	if (mp != NULL)
845024c26efSMax zhen 		xnbp->xnb_stat_rx_defer++;
846843e1988Sjohnlev 
847024c26efSMax zhen 	mutex_exit(&xnbp->xnb_rx_lock);
848843e1988Sjohnlev 
849843e1988Sjohnlev 	/* Free mblk_t's that we consumed. */
850843e1988Sjohnlev 	freemsgchain(free);
851843e1988Sjohnlev 
852843e1988Sjohnlev 	return (mp);
853843e1988Sjohnlev }
854843e1988Sjohnlev 
85556567907SDavid Edmondson /* Helper functions for xnb_copy_to_peer(). */
856551bc2a6Smrj 
857551bc2a6Smrj /*
858551bc2a6Smrj  * Grow the array of copy operation descriptors.
859551bc2a6Smrj  */
86056567907SDavid Edmondson static boolean_t
86156567907SDavid Edmondson grow_cpop_area(xnb_t *xnbp)
862551bc2a6Smrj {
86356567907SDavid Edmondson 	size_t count;
86456567907SDavid Edmondson 	gnttab_copy_t *new;
865551bc2a6Smrj 
866024c26efSMax zhen 	ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock));
867551bc2a6Smrj 
86856567907SDavid Edmondson 	count = xnbp->xnb_rx_cpop_count + CPOP_DEFCNT;
869551bc2a6Smrj 
87056567907SDavid Edmondson 	if ((new = kmem_alloc(sizeof (new[0]) * count, KM_NOSLEEP)) == NULL) {
871551bc2a6Smrj 		xnbp->xnb_stat_other_allocation_failure++;
87256567907SDavid Edmondson 		return (B_FALSE);
873551bc2a6Smrj 	}
874551bc2a6Smrj 
87556567907SDavid Edmondson 	bcopy(xnbp->xnb_rx_cpop, new,
87656567907SDavid Edmondson 	    sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count);
877551bc2a6Smrj 
87856567907SDavid Edmondson 	kmem_free(xnbp->xnb_rx_cpop,
87956567907SDavid Edmondson 	    sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count);
880551bc2a6Smrj 
88156567907SDavid Edmondson 	xnbp->xnb_rx_cpop = new;
88256567907SDavid Edmondson 	xnbp->xnb_rx_cpop_count = count;
883551bc2a6Smrj 
884024c26efSMax zhen 	xnbp->xnb_stat_rx_cpoparea_grown++;
885551bc2a6Smrj 
88656567907SDavid Edmondson 	return (B_TRUE);
887551bc2a6Smrj }
888551bc2a6Smrj 
889551bc2a6Smrj /*
890551bc2a6Smrj  * Check whether an address is on a page that's foreign to this domain.
891551bc2a6Smrj  */
892551bc2a6Smrj static boolean_t
893551bc2a6Smrj is_foreign(void *addr)
894551bc2a6Smrj {
895551bc2a6Smrj 	pfn_t pfn = hat_getpfnum(kas.a_hat, addr);
896551bc2a6Smrj 
89756567907SDavid Edmondson 	return ((pfn & PFN_IS_FOREIGN_MFN) == PFN_IS_FOREIGN_MFN);
898551bc2a6Smrj }
899551bc2a6Smrj 
900551bc2a6Smrj /*
901551bc2a6Smrj  * Insert a newly allocated mblk into a chain, replacing the old one.
902551bc2a6Smrj  */
903551bc2a6Smrj static mblk_t *
904551bc2a6Smrj replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev)
905551bc2a6Smrj {
906551bc2a6Smrj 	uint32_t	start, stuff, end, value, flags;
907551bc2a6Smrj 	mblk_t		*new_mp;
908551bc2a6Smrj 
909551bc2a6Smrj 	new_mp = copyb(mp);
9100dc2366fSVenugopal Iyer 	if (new_mp == NULL) {
911551bc2a6Smrj 		cmn_err(CE_PANIC, "replace_msg: cannot alloc new message"
912551bc2a6Smrj 		    "for %p, len %lu", (void *) mp, len);
9130dc2366fSVenugopal Iyer 	}
914551bc2a6Smrj 
9150dc2366fSVenugopal Iyer 	mac_hcksum_get(mp, &start, &stuff, &end, &value, &flags);
9160dc2366fSVenugopal Iyer 	mac_hcksum_set(new_mp, start, stuff, end, value, flags);
917551bc2a6Smrj 
918551bc2a6Smrj 	new_mp->b_next = mp->b_next;
919551bc2a6Smrj 	new_mp->b_prev = mp->b_prev;
920551bc2a6Smrj 	new_mp->b_cont = mp->b_cont;
921551bc2a6Smrj 
922551bc2a6Smrj 	/* Make sure we only overwrite pointers to the mblk being replaced. */
923551bc2a6Smrj 	if (mp_prev != NULL && mp_prev->b_next == mp)
924551bc2a6Smrj 		mp_prev->b_next = new_mp;
925551bc2a6Smrj 
926551bc2a6Smrj 	if (ml_prev != NULL && ml_prev->b_cont == mp)
927551bc2a6Smrj 		ml_prev->b_cont = new_mp;
928551bc2a6Smrj 
929551bc2a6Smrj 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
930551bc2a6Smrj 	freemsg(mp);
931551bc2a6Smrj 
932551bc2a6Smrj 	return (new_mp);
933551bc2a6Smrj }
934551bc2a6Smrj 
935551bc2a6Smrj /*
936551bc2a6Smrj  * Set all the fields in a gnttab_copy_t.
937551bc2a6Smrj  */
938551bc2a6Smrj static void
939551bc2a6Smrj setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr,
940551bc2a6Smrj     size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref)
941551bc2a6Smrj {
942551bc2a6Smrj 	ASSERT(xnbp != NULL && gp != NULL);
943551bc2a6Smrj 
944551bc2a6Smrj 	gp->source.offset = s_off;
945551bc2a6Smrj 	gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr));
946551bc2a6Smrj 	gp->source.domid = DOMID_SELF;
947551bc2a6Smrj 
948551bc2a6Smrj 	gp->len = (uint16_t)len;
949551bc2a6Smrj 	gp->flags = GNTCOPY_dest_gref;
950551bc2a6Smrj 	gp->status = 0;
951551bc2a6Smrj 
952551bc2a6Smrj 	gp->dest.u.ref = d_ref;
953551bc2a6Smrj 	gp->dest.offset = d_off;
954551bc2a6Smrj 	gp->dest.domid = xnbp->xnb_peer;
955551bc2a6Smrj }
956551bc2a6Smrj 
95756567907SDavid Edmondson /*
95856567907SDavid Edmondson  * Pass packets to the peer using hypervisor copy operations.
95956567907SDavid Edmondson  */
960551bc2a6Smrj mblk_t *
961551bc2a6Smrj xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp)
962551bc2a6Smrj {
963551bc2a6Smrj 	mblk_t		*free = mp, *mp_prev = NULL, *saved_mp = mp;
964551bc2a6Smrj 	mblk_t		*ml, *ml_prev;
965551bc2a6Smrj 	boolean_t	notify;
966551bc2a6Smrj 	RING_IDX	loop, prod;
967551bc2a6Smrj 	int		i;
968551bc2a6Smrj 
96956567907SDavid Edmondson 	/*
97056567907SDavid Edmondson 	 * If the peer does not pre-post buffers for received packets,
97156567907SDavid Edmondson 	 * use page flipping to pass packets to it.
97256567907SDavid Edmondson 	 */
97356567907SDavid Edmondson 	if (!xnbp->xnb_rx_hv_copy)
974551bc2a6Smrj 		return (xnb_to_peer(xnbp, mp));
975551bc2a6Smrj 
976551bc2a6Smrj 	/*
977551bc2a6Smrj 	 * For each packet the sequence of operations is:
978551bc2a6Smrj 	 *
979551bc2a6Smrj 	 *  1. get a request slot from the ring.
980551bc2a6Smrj 	 *  2. set up data for hypercall (see NOTE below)
981551bc2a6Smrj 	 *  3. have the hypervisore copy the data
982551bc2a6Smrj 	 *  4. update the request slot.
983551bc2a6Smrj 	 *  5. kick the peer.
984551bc2a6Smrj 	 *
985551bc2a6Smrj 	 * NOTE ad 2.
986551bc2a6Smrj 	 *  In order to reduce the number of hypercalls, we prepare
98756567907SDavid Edmondson 	 *  several mblks (mp->b_cont != NULL) for the peer and
98856567907SDavid Edmondson 	 *  perform a single hypercall to transfer them.  We also have
98956567907SDavid Edmondson 	 *  to set up a seperate copy operation for every page.
990551bc2a6Smrj 	 *
99156567907SDavid Edmondson 	 * If we have more than one packet (mp->b_next != NULL), we do
99256567907SDavid Edmondson 	 * this whole dance repeatedly.
993551bc2a6Smrj 	 */
994551bc2a6Smrj 
995024c26efSMax zhen 	mutex_enter(&xnbp->xnb_rx_lock);
996551bc2a6Smrj 
997551bc2a6Smrj 	if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) {
998024c26efSMax zhen 		mutex_exit(&xnbp->xnb_rx_lock);
999024c26efSMax zhen 		DTRACE_PROBE(copy_rx_too_early);
1000024c26efSMax zhen 		xnbp->xnb_stat_rx_too_early++;
1001551bc2a6Smrj 		return (mp);
1002551bc2a6Smrj 	}
1003551bc2a6Smrj 
1004551bc2a6Smrj 	loop = xnbp->xnb_rx_ring.req_cons;
1005551bc2a6Smrj 	prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
1006551bc2a6Smrj 
1007551bc2a6Smrj 	while ((mp != NULL) &&
1008551bc2a6Smrj 	    XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) {
1009551bc2a6Smrj 		netif_rx_request_t	*rxreq;
101056567907SDavid Edmondson 		size_t			d_offset, len;
101156567907SDavid Edmondson 		int			item_count;
101256567907SDavid Edmondson 		gnttab_copy_t		*gop_cp;
1013551bc2a6Smrj 		netif_rx_response_t	*rxresp;
1014551bc2a6Smrj 		uint16_t		cksum_flags;
1015551bc2a6Smrj 		int16_t			status = NETIF_RSP_OKAY;
1016551bc2a6Smrj 
1017551bc2a6Smrj 		/* 1 */
1018551bc2a6Smrj 		rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop);
1019551bc2a6Smrj 
1020551bc2a6Smrj #ifdef XNB_DEBUG
1021551bc2a6Smrj 		if (!(rxreq->id < NET_RX_RING_SIZE))
1022551bc2a6Smrj 			cmn_err(CE_PANIC, "xnb_copy_to_peer: "
1023551bc2a6Smrj 			    "id %d out of range in request 0x%p",
1024551bc2a6Smrj 			    rxreq->id, (void *)rxreq);
1025551bc2a6Smrj #endif /* XNB_DEBUG */
1026551bc2a6Smrj 
1027551bc2a6Smrj 		/* 2 */
10286ac4daadSDavid Edmondson 		d_offset = 0;
1029551bc2a6Smrj 		len = 0;
1030551bc2a6Smrj 		item_count = 0;
1031551bc2a6Smrj 
1032024c26efSMax zhen 		gop_cp = xnbp->xnb_rx_cpop;
1033551bc2a6Smrj 
1034551bc2a6Smrj 		/*
103556567907SDavid Edmondson 		 * We walk the b_cont pointers and set up a
103656567907SDavid Edmondson 		 * gnttab_copy_t for each sub-page chunk in each data
103756567907SDavid Edmondson 		 * block.
1038551bc2a6Smrj 		 */
1039551bc2a6Smrj 		/* 2a */
1040551bc2a6Smrj 		for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) {
1041551bc2a6Smrj 			size_t	chunk = ml->b_wptr - ml->b_rptr;
1042551bc2a6Smrj 			uchar_t	*r_tmp,	*rpt_align;
1043551bc2a6Smrj 			size_t	r_offset;
1044551bc2a6Smrj 
1045551bc2a6Smrj 			/*
104656567907SDavid Edmondson 			 * The hypervisor will not allow us to
104756567907SDavid Edmondson 			 * reference a foreign page (e.g. one
104856567907SDavid Edmondson 			 * belonging to another domain) by mfn in the
104956567907SDavid Edmondson 			 * copy operation. If the data in this mblk is
105056567907SDavid Edmondson 			 * on such a page we must copy the data into a
105156567907SDavid Edmondson 			 * local page before initiating the hypervisor
105256567907SDavid Edmondson 			 * copy operation.
1053551bc2a6Smrj 			 */
1054551bc2a6Smrj 			if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) {
1055551bc2a6Smrj 				mblk_t *ml_new = replace_msg(ml, chunk,
1056551bc2a6Smrj 				    mp_prev, ml_prev);
1057551bc2a6Smrj 
1058551bc2a6Smrj 				/* We can still use old ml, but not *ml! */
1059551bc2a6Smrj 				if (free == ml)
1060551bc2a6Smrj 					free = ml_new;
1061551bc2a6Smrj 				if (mp == ml)
1062551bc2a6Smrj 					mp = ml_new;
1063551bc2a6Smrj 				ml = ml_new;
1064551bc2a6Smrj 
1065024c26efSMax zhen 				xnbp->xnb_stat_rx_foreign_page++;
1066551bc2a6Smrj 			}
1067551bc2a6Smrj 
1068551bc2a6Smrj 			rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr);
1069551bc2a6Smrj 			r_offset = (uint16_t)(ml->b_rptr - rpt_align);
1070551bc2a6Smrj 			r_tmp = ml->b_rptr;
1071551bc2a6Smrj 
1072551bc2a6Smrj 			if (d_offset + chunk > PAGESIZE)
1073551bc2a6Smrj 				cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p "
1074551bc2a6Smrj 				    "(svd: %p), ml %p,rpt_alg. %p, d_offset "
1075551bc2a6Smrj 				    "(%lu) + chunk (%lu) > PAGESIZE %d!",
1076551bc2a6Smrj 				    (void *)mp, (void *)saved_mp, (void *)ml,
1077551bc2a6Smrj 				    (void *)rpt_align,
1078551bc2a6Smrj 				    d_offset, chunk, (int)PAGESIZE);
1079551bc2a6Smrj 
1080551bc2a6Smrj 			while (chunk > 0) {
1081551bc2a6Smrj 				size_t part_len;
1082551bc2a6Smrj 
108356567907SDavid Edmondson 				if (item_count == xnbp->xnb_rx_cpop_count) {
108456567907SDavid Edmondson 					if (!grow_cpop_area(xnbp))
1085551bc2a6Smrj 						goto failure;
108656567907SDavid Edmondson 					gop_cp = &xnbp->xnb_rx_cpop[item_count];
1087551bc2a6Smrj 				}
1088551bc2a6Smrj 				/*
1089551bc2a6Smrj 				 * If our mblk crosses a page boundary, we need
109056567907SDavid Edmondson 				 * to do a seperate copy for each page.
1091551bc2a6Smrj 				 */
1092551bc2a6Smrj 				if (r_offset + chunk > PAGESIZE) {
1093551bc2a6Smrj 					part_len = PAGESIZE - r_offset;
1094551bc2a6Smrj 
1095551bc2a6Smrj 					DTRACE_PROBE3(mblk_page_crossed,
1096551bc2a6Smrj 					    (mblk_t *), ml, int, chunk, int,
1097551bc2a6Smrj 					    (int)r_offset);
1098551bc2a6Smrj 
1099024c26efSMax zhen 					xnbp->xnb_stat_rx_pagebndry_crossed++;
1100551bc2a6Smrj 				} else {
1101551bc2a6Smrj 					part_len = chunk;
1102551bc2a6Smrj 				}
1103551bc2a6Smrj 
1104551bc2a6Smrj 				setup_gop(xnbp, gop_cp, r_tmp, r_offset,
1105551bc2a6Smrj 				    d_offset, part_len, rxreq->gref);
1106551bc2a6Smrj 
1107551bc2a6Smrj 				chunk -= part_len;
1108551bc2a6Smrj 
1109551bc2a6Smrj 				len += part_len;
1110551bc2a6Smrj 				d_offset += part_len;
1111551bc2a6Smrj 				r_tmp += part_len;
1112551bc2a6Smrj 				/*
1113551bc2a6Smrj 				 * The 2nd, 3rd ... last copies will always
1114551bc2a6Smrj 				 * start at r_tmp, therefore r_offset is 0.
1115551bc2a6Smrj 				 */
1116551bc2a6Smrj 				r_offset = 0;
1117551bc2a6Smrj 				gop_cp++;
111856567907SDavid Edmondson 				item_count++;
1119551bc2a6Smrj 			}
1120551bc2a6Smrj 			ml_prev = ml;
112156567907SDavid Edmondson 
1122551bc2a6Smrj 			DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int,
1123551bc2a6Smrj 			    chunk, int, len, int, item_count);
1124551bc2a6Smrj 		}
1125551bc2a6Smrj 		/* 3 */
1126024c26efSMax zhen 		if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_rx_cpop,
1127551bc2a6Smrj 		    item_count) != 0) {
1128551bc2a6Smrj 			cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed");
1129551bc2a6Smrj 			DTRACE_PROBE(HV_granttableopfailed);
1130551bc2a6Smrj 		}
1131551bc2a6Smrj 
1132551bc2a6Smrj 		/* 4 */
1133551bc2a6Smrj 		rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod);
11346ac4daadSDavid Edmondson 		rxresp->offset = 0;
1135551bc2a6Smrj 
1136551bc2a6Smrj 		rxresp->flags = 0;
1137551bc2a6Smrj 
1138551bc2a6Smrj 		DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int,
1139551bc2a6Smrj 		    (int)rxresp->offset, int, (int)rxresp->flags, int,
1140551bc2a6Smrj 		    (int)rxresp->status);
1141551bc2a6Smrj 
1142551bc2a6Smrj 		cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp);
1143551bc2a6Smrj 		if (cksum_flags != 0)
1144024c26efSMax zhen 			xnbp->xnb_stat_rx_cksum_deferred++;
1145551bc2a6Smrj 		rxresp->flags |= cksum_flags;
1146551bc2a6Smrj 
1147551bc2a6Smrj 		rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id;
1148551bc2a6Smrj 		rxresp->status = len;
1149551bc2a6Smrj 
1150551bc2a6Smrj 		DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int,
1151551bc2a6Smrj 		    (int)rxresp->offset, int, (int)rxresp->flags, int,
1152551bc2a6Smrj 		    (int)rxresp->status);
1153551bc2a6Smrj 
1154551bc2a6Smrj 		for (i = 0; i < item_count; i++) {
1155024c26efSMax zhen 			if (xnbp->xnb_rx_cpop[i].status != 0) {
115656567907SDavid Edmondson 				DTRACE_PROBE2(cpop_status_nonnull, int,
1157024c26efSMax zhen 				    (int)xnbp->xnb_rx_cpop[i].status,
1158551bc2a6Smrj 				    int, i);
1159551bc2a6Smrj 				status = NETIF_RSP_ERROR;
1160551bc2a6Smrj 			}
1161551bc2a6Smrj 		}
1162551bc2a6Smrj 
1163551bc2a6Smrj 		/* 5.2 */
1164551bc2a6Smrj 		if (status != NETIF_RSP_OKAY) {
1165551bc2a6Smrj 			RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status =
1166551bc2a6Smrj 			    status;
1167024c26efSMax zhen 			xnbp->xnb_stat_rx_rsp_notok++;
1168551bc2a6Smrj 		} else {
1169024c26efSMax zhen 			xnbp->xnb_stat_ipackets++;
1170024c26efSMax zhen 			xnbp->xnb_stat_rbytes += len;
1171551bc2a6Smrj 		}
1172551bc2a6Smrj 
1173551bc2a6Smrj 		loop++;
1174551bc2a6Smrj 		prod++;
1175551bc2a6Smrj 		mp_prev = mp;
1176551bc2a6Smrj 		mp = mp->b_next;
1177551bc2a6Smrj 	}
1178551bc2a6Smrj failure:
1179551bc2a6Smrj 	/*
1180551bc2a6Smrj 	 * Did we actually do anything?
1181551bc2a6Smrj 	 */
1182551bc2a6Smrj 	if (loop == xnbp->xnb_rx_ring.req_cons) {
1183024c26efSMax zhen 		mutex_exit(&xnbp->xnb_rx_lock);
1184551bc2a6Smrj 		return (mp);
1185551bc2a6Smrj 	}
1186551bc2a6Smrj 
1187551bc2a6Smrj 	/*
1188551bc2a6Smrj 	 * Unlink the end of the 'done' list from the remainder.
1189551bc2a6Smrj 	 */
1190551bc2a6Smrj 	ASSERT(mp_prev != NULL);
1191551bc2a6Smrj 	mp_prev->b_next = NULL;
1192551bc2a6Smrj 
1193551bc2a6Smrj 	xnbp->xnb_rx_ring.req_cons = loop;
1194551bc2a6Smrj 	xnbp->xnb_rx_ring.rsp_prod_pvt = prod;
1195551bc2a6Smrj 
1196551bc2a6Smrj 	/* 6 */
1197551bc2a6Smrj 	/* LINTED: constant in conditional context */
1198551bc2a6Smrj 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify);
1199551bc2a6Smrj 	if (notify) {
1200551bc2a6Smrj 		ec_notify_via_evtchn(xnbp->xnb_evtchn);
1201024c26efSMax zhen 		xnbp->xnb_stat_rx_notify_sent++;
1202551bc2a6Smrj 	} else {
1203024c26efSMax zhen 		xnbp->xnb_stat_rx_notify_deferred++;
1204551bc2a6Smrj 	}
1205551bc2a6Smrj 
1206551bc2a6Smrj 	if (mp != NULL)
1207024c26efSMax zhen 		xnbp->xnb_stat_rx_defer++;
1208551bc2a6Smrj 
1209024c26efSMax zhen 	mutex_exit(&xnbp->xnb_rx_lock);
1210551bc2a6Smrj 
1211551bc2a6Smrj 	/* Free mblk_t structs we have consumed. */
1212551bc2a6Smrj 	freemsgchain(free);
1213551bc2a6Smrj 
1214551bc2a6Smrj 	return (mp);
1215551bc2a6Smrj }
1216551bc2a6Smrj 
1217843e1988Sjohnlev 
1218843e1988Sjohnlev static void
121956567907SDavid Edmondson xnb_tx_notify_peer(xnb_t *xnbp, boolean_t force)
1220843e1988Sjohnlev {
1221843e1988Sjohnlev 	boolean_t notify;
1222843e1988Sjohnlev 
1223024c26efSMax zhen 	ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
1224843e1988Sjohnlev 
1225843e1988Sjohnlev 	/* LINTED: constant in conditional context */
1226551bc2a6Smrj 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify);
122756567907SDavid Edmondson 	if (notify || force) {
1228551bc2a6Smrj 		ec_notify_via_evtchn(xnbp->xnb_evtchn);
1229024c26efSMax zhen 		xnbp->xnb_stat_tx_notify_sent++;
1230843e1988Sjohnlev 	} else {
1231024c26efSMax zhen 		xnbp->xnb_stat_tx_notify_deferred++;
1232843e1988Sjohnlev 	}
1233843e1988Sjohnlev }
1234843e1988Sjohnlev 
1235843e1988Sjohnlev static void
1236024c26efSMax zhen xnb_tx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status)
1237843e1988Sjohnlev {
1238843e1988Sjohnlev 	RING_IDX i;
1239843e1988Sjohnlev 	netif_tx_response_t *txresp;
1240843e1988Sjohnlev 
1241024c26efSMax zhen 	ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
1242843e1988Sjohnlev 
1243551bc2a6Smrj 	i = xnbp->xnb_tx_ring.rsp_prod_pvt;
1244843e1988Sjohnlev 
1245551bc2a6Smrj 	txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i);
1246843e1988Sjohnlev 	txresp->id = id;
1247843e1988Sjohnlev 	txresp->status = status;
1248843e1988Sjohnlev 
1249551bc2a6Smrj 	xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1;
1250843e1988Sjohnlev 
1251843e1988Sjohnlev 	/*
1252843e1988Sjohnlev 	 * Note that we don't push the change to the peer here - that
1253843e1988Sjohnlev 	 * is the callers responsibility.
1254843e1988Sjohnlev 	 */
1255843e1988Sjohnlev }
1256843e1988Sjohnlev 
1257843e1988Sjohnlev static void
125856567907SDavid Edmondson xnb_txbuf_recycle(xnb_txbuf_t *txp)
1259843e1988Sjohnlev {
126056567907SDavid Edmondson 	xnb_t *xnbp = txp->xt_xnbp;
1261843e1988Sjohnlev 
126256567907SDavid Edmondson 	kmem_cache_free(xnbp->xnb_tx_buf_cache, txp);
1263843e1988Sjohnlev 
126456567907SDavid Edmondson 	xnbp->xnb_tx_buf_outstanding--;
1265843e1988Sjohnlev }
1266843e1988Sjohnlev 
126756567907SDavid Edmondson static int
126856567907SDavid Edmondson xnb_txbuf_constructor(void *buf, void *arg, int kmflag)
1269843e1988Sjohnlev {
127056567907SDavid Edmondson 	_NOTE(ARGUNUSED(kmflag));
127156567907SDavid Edmondson 	xnb_txbuf_t *txp = buf;
127256567907SDavid Edmondson 	xnb_t *xnbp = arg;
127356567907SDavid Edmondson 	size_t len;
127456567907SDavid Edmondson 	ddi_dma_cookie_t dma_cookie;
127556567907SDavid Edmondson 	uint_t ncookies;
1276843e1988Sjohnlev 
127756567907SDavid Edmondson 	txp->xt_free_rtn.free_func = xnb_txbuf_recycle;
127856567907SDavid Edmondson 	txp->xt_free_rtn.free_arg = (caddr_t)txp;
1279024c26efSMax zhen 	txp->xt_xnbp = xnbp;
128056567907SDavid Edmondson 	txp->xt_next = NULL;
1281843e1988Sjohnlev 
128256567907SDavid Edmondson 	if (ddi_dma_alloc_handle(xnbp->xnb_devinfo, &buf_dma_attr,
128356567907SDavid Edmondson 	    0, 0, &txp->xt_dma_handle) != DDI_SUCCESS)
128456567907SDavid Edmondson 		goto failure;
1285843e1988Sjohnlev 
128656567907SDavid Edmondson 	if (ddi_dma_mem_alloc(txp->xt_dma_handle, PAGESIZE, &data_accattr,
128756567907SDavid Edmondson 	    DDI_DMA_STREAMING, 0, 0, &txp->xt_buf, &len,
128856567907SDavid Edmondson 	    &txp->xt_acc_handle) != DDI_SUCCESS)
128956567907SDavid Edmondson 		goto failure_1;
1290843e1988Sjohnlev 
129156567907SDavid Edmondson 	if (ddi_dma_addr_bind_handle(txp->xt_dma_handle, NULL, txp->xt_buf,
129256567907SDavid Edmondson 	    len, DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 0,
129356567907SDavid Edmondson 	    &dma_cookie, &ncookies)
129456567907SDavid Edmondson 	    != DDI_DMA_MAPPED)
129556567907SDavid Edmondson 		goto failure_2;
129656567907SDavid Edmondson 	ASSERT(ncookies == 1);
129756567907SDavid Edmondson 
129856567907SDavid Edmondson 	txp->xt_mfn = xnb_btop(dma_cookie.dmac_laddress);
129956567907SDavid Edmondson 	txp->xt_buflen = dma_cookie.dmac_size;
130056567907SDavid Edmondson 
130156567907SDavid Edmondson 	DTRACE_PROBE(txbuf_allocated);
130256567907SDavid Edmondson 
1303*1a5e258fSJosef 'Jeff' Sipek 	atomic_inc_32(&xnbp->xnb_tx_buf_count);
130456567907SDavid Edmondson 	xnbp->xnb_tx_buf_outstanding++;
130556567907SDavid Edmondson 
130656567907SDavid Edmondson 	return (0);
130756567907SDavid Edmondson 
130856567907SDavid Edmondson failure_2:
130956567907SDavid Edmondson 	ddi_dma_mem_free(&txp->xt_acc_handle);
131056567907SDavid Edmondson 
131156567907SDavid Edmondson failure_1:
131256567907SDavid Edmondson 	ddi_dma_free_handle(&txp->xt_dma_handle);
131356567907SDavid Edmondson 
131456567907SDavid Edmondson failure:
131556567907SDavid Edmondson 
131656567907SDavid Edmondson 	return (-1);
1317843e1988Sjohnlev }
1318843e1988Sjohnlev 
1319843e1988Sjohnlev static void
132056567907SDavid Edmondson xnb_txbuf_destructor(void *buf, void *arg)
1321843e1988Sjohnlev {
132256567907SDavid Edmondson 	xnb_txbuf_t *txp = buf;
132356567907SDavid Edmondson 	xnb_t *xnbp = arg;
1324843e1988Sjohnlev 
132556567907SDavid Edmondson 	(void) ddi_dma_unbind_handle(txp->xt_dma_handle);
132656567907SDavid Edmondson 	ddi_dma_mem_free(&txp->xt_acc_handle);
132756567907SDavid Edmondson 	ddi_dma_free_handle(&txp->xt_dma_handle);
1328843e1988Sjohnlev 
1329*1a5e258fSJosef 'Jeff' Sipek 	atomic_dec_32(&xnbp->xnb_tx_buf_count);
1330843e1988Sjohnlev }
1331843e1988Sjohnlev 
133256567907SDavid Edmondson /*
133356567907SDavid Edmondson  * Take packets from the peer and deliver them onward.
133456567907SDavid Edmondson  */
1335843e1988Sjohnlev static mblk_t *
1336024c26efSMax zhen xnb_from_peer(xnb_t *xnbp)
1337843e1988Sjohnlev {
1338843e1988Sjohnlev 	RING_IDX start, end, loop;
133956567907SDavid Edmondson 	gnttab_copy_t *cop;
1340024c26efSMax zhen 	xnb_txbuf_t **txpp;
1341843e1988Sjohnlev 	netif_tx_request_t *txreq;
134256567907SDavid Edmondson 	boolean_t work_to_do, need_notify = B_FALSE;
1343843e1988Sjohnlev 	mblk_t *head, *tail;
134456567907SDavid Edmondson 	int n_data_req, i;
1345843e1988Sjohnlev 
134656567907SDavid Edmondson 	ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
1347843e1988Sjohnlev 
1348843e1988Sjohnlev 	head = tail = NULL;
1349843e1988Sjohnlev around:
1350843e1988Sjohnlev 
1351843e1988Sjohnlev 	/* LINTED: constant in conditional context */
1352551bc2a6Smrj 	RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do);
1353843e1988Sjohnlev 	if (!work_to_do) {
1354843e1988Sjohnlev finished:
135556567907SDavid Edmondson 		xnb_tx_notify_peer(xnbp, need_notify);
135656567907SDavid Edmondson 
1357843e1988Sjohnlev 		return (head);
1358843e1988Sjohnlev 	}
1359843e1988Sjohnlev 
1360551bc2a6Smrj 	start = xnbp->xnb_tx_ring.req_cons;
1361551bc2a6Smrj 	end = xnbp->xnb_tx_ring.sring->req_prod;
1362843e1988Sjohnlev 
1363a8e7f927SDavid Edmondson 	if ((end - start) > NET_TX_RING_SIZE) {
1364a8e7f927SDavid Edmondson 		/*
1365a8e7f927SDavid Edmondson 		 * This usually indicates that the frontend driver is
1366a8e7f927SDavid Edmondson 		 * misbehaving, as it's not possible to have more than
1367a8e7f927SDavid Edmondson 		 * NET_TX_RING_SIZE ring elements in play at any one
1368a8e7f927SDavid Edmondson 		 * time.
1369a8e7f927SDavid Edmondson 		 *
1370a8e7f927SDavid Edmondson 		 * We reset the ring pointers to the state declared by
1371a8e7f927SDavid Edmondson 		 * the frontend and try to carry on.
1372a8e7f927SDavid Edmondson 		 */
1373a8e7f927SDavid Edmondson 		cmn_err(CE_WARN, "xnb_from_peer: domain %d tried to give us %u "
1374a8e7f927SDavid Edmondson 		    "items in the ring, resetting and trying to recover.",
1375a8e7f927SDavid Edmondson 		    xnbp->xnb_peer, (end - start));
1376a8e7f927SDavid Edmondson 
1377a8e7f927SDavid Edmondson 		/* LINTED: constant in conditional context */
1378a8e7f927SDavid Edmondson 		BACK_RING_ATTACH(&xnbp->xnb_tx_ring,
1379a8e7f927SDavid Edmondson 		    (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE);
1380a8e7f927SDavid Edmondson 
1381a8e7f927SDavid Edmondson 		goto around;
1382a8e7f927SDavid Edmondson 	}
1383a8e7f927SDavid Edmondson 
138456567907SDavid Edmondson 	loop = start;
138556567907SDavid Edmondson 	cop = xnbp->xnb_tx_cop;
138656567907SDavid Edmondson 	txpp = xnbp->xnb_tx_bufp;
138756567907SDavid Edmondson 	n_data_req = 0;
138856567907SDavid Edmondson 
138956567907SDavid Edmondson 	while (loop < end) {
1390fd0939efSDavid Edmondson 		static const uint16_t acceptable_flags =
1391fd0939efSDavid Edmondson 		    NETTXF_csum_blank |
1392fd0939efSDavid Edmondson 		    NETTXF_data_validated |
1393fd0939efSDavid Edmondson 		    NETTXF_extra_info;
1394fd0939efSDavid Edmondson 		uint16_t unexpected_flags;
1395fd0939efSDavid Edmondson 
139656567907SDavid Edmondson 		txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop);
139756567907SDavid Edmondson 
1398fd0939efSDavid Edmondson 		unexpected_flags = txreq->flags & ~acceptable_flags;
1399fd0939efSDavid Edmondson 		if (unexpected_flags != 0) {
1400fd0939efSDavid Edmondson 			/*
1401fd0939efSDavid Edmondson 			 * The peer used flag bits that we do not
1402fd0939efSDavid Edmondson 			 * recognize.
1403fd0939efSDavid Edmondson 			 */
1404fd0939efSDavid Edmondson 			cmn_err(CE_WARN, "xnb_from_peer: "
1405fd0939efSDavid Edmondson 			    "unexpected flag bits (0x%x) from peer "
1406fd0939efSDavid Edmondson 			    "in transmit request",
1407fd0939efSDavid Edmondson 			    unexpected_flags);
1408fd0939efSDavid Edmondson 			xnbp->xnb_stat_tx_unexpected_flags++;
1409fd0939efSDavid Edmondson 
1410fd0939efSDavid Edmondson 			/* Mark this entry as failed. */
1411fd0939efSDavid Edmondson 			xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR);
1412fd0939efSDavid Edmondson 			need_notify = B_TRUE;
1413fd0939efSDavid Edmondson 
1414fd0939efSDavid Edmondson 		} else if (txreq->flags & NETTXF_extra_info) {
141556567907SDavid Edmondson 			struct netif_extra_info *erp;
141656567907SDavid Edmondson 			boolean_t status;
141756567907SDavid Edmondson 
141856567907SDavid Edmondson 			loop++; /* Consume another slot in the ring. */
141956567907SDavid Edmondson 			ASSERT(loop <= end);
142056567907SDavid Edmondson 
142156567907SDavid Edmondson 			erp = (struct netif_extra_info *)
142256567907SDavid Edmondson 			    RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop);
142356567907SDavid Edmondson 
142456567907SDavid Edmondson 			switch (erp->type) {
142556567907SDavid Edmondson 			case XEN_NETIF_EXTRA_TYPE_MCAST_ADD:
142656567907SDavid Edmondson 				ASSERT(xnbp->xnb_multicast_control);
142756567907SDavid Edmondson 				status = xnbp->xnb_flavour->xf_mcast_add(xnbp,
142856567907SDavid Edmondson 				    &erp->u.mcast.addr);
142956567907SDavid Edmondson 				break;
143056567907SDavid Edmondson 			case XEN_NETIF_EXTRA_TYPE_MCAST_DEL:
143156567907SDavid Edmondson 				ASSERT(xnbp->xnb_multicast_control);
143256567907SDavid Edmondson 				status = xnbp->xnb_flavour->xf_mcast_del(xnbp,
143356567907SDavid Edmondson 				    &erp->u.mcast.addr);
143456567907SDavid Edmondson 				break;
143556567907SDavid Edmondson 			default:
143656567907SDavid Edmondson 				status = B_FALSE;
143756567907SDavid Edmondson 				cmn_err(CE_WARN, "xnb_from_peer: "
143856567907SDavid Edmondson 				    "unknown extra type %d", erp->type);
143956567907SDavid Edmondson 				break;
144056567907SDavid Edmondson 			}
144156567907SDavid Edmondson 
144256567907SDavid Edmondson 			xnb_tx_mark_complete(xnbp, txreq->id,
144356567907SDavid Edmondson 			    status ? NETIF_RSP_OKAY : NETIF_RSP_ERROR);
144456567907SDavid Edmondson 			need_notify = B_TRUE;
1445fd0939efSDavid Edmondson 
1446fd0939efSDavid Edmondson 		} else if ((txreq->offset > PAGESIZE) ||
1447fd0939efSDavid Edmondson 		    (txreq->offset + txreq->size > PAGESIZE)) {
1448fd0939efSDavid Edmondson 			/*
1449fd0939efSDavid Edmondson 			 * Peer attempted to refer to data beyond the
1450fd0939efSDavid Edmondson 			 * end of the granted page.
1451fd0939efSDavid Edmondson 			 */
1452fd0939efSDavid Edmondson 			cmn_err(CE_WARN, "xnb_from_peer: "
1453fd0939efSDavid Edmondson 			    "attempt to refer beyond the end of granted "
1454fd0939efSDavid Edmondson 			    "page in txreq (offset %d, size %d).",
1455fd0939efSDavid Edmondson 			    txreq->offset, txreq->size);
1456fd0939efSDavid Edmondson 			xnbp->xnb_stat_tx_overflow_page++;
1457fd0939efSDavid Edmondson 
1458fd0939efSDavid Edmondson 			/* Mark this entry as failed. */
1459fd0939efSDavid Edmondson 			xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR);
1460fd0939efSDavid Edmondson 			need_notify = B_TRUE;
1461fd0939efSDavid Edmondson 
146256567907SDavid Edmondson 		} else {
1463024c26efSMax zhen 			xnb_txbuf_t *txp;
1464843e1988Sjohnlev 
146556567907SDavid Edmondson 			txp = kmem_cache_alloc(xnbp->xnb_tx_buf_cache,
146656567907SDavid Edmondson 			    KM_NOSLEEP);
1467024c26efSMax zhen 			if (txp == NULL)
1468843e1988Sjohnlev 				break;
1469843e1988Sjohnlev 
147056567907SDavid Edmondson 			txp->xt_mblk = desballoc((unsigned char *)txp->xt_buf,
147156567907SDavid Edmondson 			    txp->xt_buflen, 0, &txp->xt_free_rtn);
147256567907SDavid Edmondson 			if (txp->xt_mblk == NULL) {
147356567907SDavid Edmondson 				kmem_cache_free(xnbp->xnb_tx_buf_cache, txp);
147456567907SDavid Edmondson 				break;
1475843e1988Sjohnlev 			}
1476843e1988Sjohnlev 
147756567907SDavid Edmondson 			txp->xt_idx = loop;
147856567907SDavid Edmondson 			txp->xt_id = txreq->id;
1479843e1988Sjohnlev 
148056567907SDavid Edmondson 			cop->source.u.ref = txreq->gref;
148156567907SDavid Edmondson 			cop->source.domid = xnbp->xnb_peer;
148256567907SDavid Edmondson 			cop->source.offset = txreq->offset;
1483843e1988Sjohnlev 
148456567907SDavid Edmondson 			cop->dest.u.gmfn = txp->xt_mfn;
148556567907SDavid Edmondson 			cop->dest.domid = DOMID_SELF;
148656567907SDavid Edmondson 			cop->dest.offset = 0;
1487843e1988Sjohnlev 
148856567907SDavid Edmondson 			cop->len = txreq->size;
148956567907SDavid Edmondson 			cop->flags = GNTCOPY_source_gref;
149056567907SDavid Edmondson 			cop->status = 0;
1491843e1988Sjohnlev 
149256567907SDavid Edmondson 			*txpp = txp;
1493843e1988Sjohnlev 
149456567907SDavid Edmondson 			txpp++;
149556567907SDavid Edmondson 			cop++;
149656567907SDavid Edmondson 			n_data_req++;
149756567907SDavid Edmondson 
149856567907SDavid Edmondson 			ASSERT(n_data_req <= NET_TX_RING_SIZE);
149956567907SDavid Edmondson 		}
1500843e1988Sjohnlev 
1501843e1988Sjohnlev 		loop++;
150256567907SDavid Edmondson 	}
150356567907SDavid Edmondson 
150456567907SDavid Edmondson 	xnbp->xnb_tx_ring.req_cons = loop;
150556567907SDavid Edmondson 
150656567907SDavid Edmondson 	if (n_data_req == 0)
150756567907SDavid Edmondson 		goto around;
150856567907SDavid Edmondson 
150956567907SDavid Edmondson 	if (HYPERVISOR_grant_table_op(GNTTABOP_copy,
151056567907SDavid Edmondson 	    xnbp->xnb_tx_cop, n_data_req) != 0) {
151156567907SDavid Edmondson 
151256567907SDavid Edmondson 		cmn_err(CE_WARN, "xnb_from_peer: copy operation failed");
151356567907SDavid Edmondson 
151456567907SDavid Edmondson 		txpp = xnbp->xnb_tx_bufp;
151556567907SDavid Edmondson 		i = n_data_req;
151656567907SDavid Edmondson 		while (i > 0) {
151756567907SDavid Edmondson 			kmem_cache_free(xnbp->xnb_tx_buf_cache, *txpp);
1518024c26efSMax zhen 			txpp++;
151956567907SDavid Edmondson 			i--;
1520843e1988Sjohnlev 		}
1521843e1988Sjohnlev 
1522843e1988Sjohnlev 		goto finished;
1523843e1988Sjohnlev 	}
1524843e1988Sjohnlev 
152556567907SDavid Edmondson 	txpp = xnbp->xnb_tx_bufp;
152656567907SDavid Edmondson 	cop = xnbp->xnb_tx_cop;
152756567907SDavid Edmondson 	i = n_data_req;
152856567907SDavid Edmondson 
152956567907SDavid Edmondson 	while (i > 0) {
1530024c26efSMax zhen 		xnb_txbuf_t *txp = *txpp;
1531843e1988Sjohnlev 
153256567907SDavid Edmondson 		txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, txp->xt_idx);
153356567907SDavid Edmondson 
153456567907SDavid Edmondson 		if (cop->status != 0) {
153556567907SDavid Edmondson #ifdef XNB_DEBUG
1536024c26efSMax zhen 			cmn_err(CE_WARN, "xnb_from_peer: "
153756567907SDavid Edmondson 			    "txpp 0x%p failed (%d)",
153856567907SDavid Edmondson 			    (void *)*txpp, cop->status);
153956567907SDavid Edmondson #endif /* XNB_DEBUG */
1540fd0939efSDavid Edmondson 			xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_ERROR);
154156567907SDavid Edmondson 			freemsg(txp->xt_mblk);
1542843e1988Sjohnlev 		} else {
154356567907SDavid Edmondson 			mblk_t *mp;
154456567907SDavid Edmondson 
154556567907SDavid Edmondson 			mp = txp->xt_mblk;
154656567907SDavid Edmondson 			mp->b_rptr = mp->b_wptr = (unsigned char *)txp->xt_buf;
1547843e1988Sjohnlev 			mp->b_wptr += txreq->size;
154856567907SDavid Edmondson 			mp->b_next = NULL;
1549843e1988Sjohnlev 
1550843e1988Sjohnlev 			/*
155156567907SDavid Edmondson 			 * If there are checksum flags, process them
155256567907SDavid Edmondson 			 * appropriately.
1553843e1988Sjohnlev 			 */
155456567907SDavid Edmondson 			if ((txreq->flags &
1555843e1988Sjohnlev 			    (NETTXF_csum_blank | NETTXF_data_validated))
155656567907SDavid Edmondson 			    != 0) {
1557551bc2a6Smrj 				mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp,
1558843e1988Sjohnlev 				    mp, txreq->flags);
1559024c26efSMax zhen 				xnbp->xnb_stat_tx_cksum_no_need++;
156056567907SDavid Edmondson 
156156567907SDavid Edmondson 				txp->xt_mblk = mp;
1562843e1988Sjohnlev 			}
1563843e1988Sjohnlev 
1564843e1988Sjohnlev 			if (head == NULL) {
1565843e1988Sjohnlev 				ASSERT(tail == NULL);
1566843e1988Sjohnlev 				head = mp;
1567843e1988Sjohnlev 			} else {
1568843e1988Sjohnlev 				ASSERT(tail != NULL);
1569843e1988Sjohnlev 				tail->b_next = mp;
1570843e1988Sjohnlev 			}
1571843e1988Sjohnlev 			tail = mp;
157256567907SDavid Edmondson 
157356567907SDavid Edmondson 			xnbp->xnb_stat_opackets++;
157456567907SDavid Edmondson 			xnbp->xnb_stat_obytes += txreq->size;
157556567907SDavid Edmondson 
1576fd0939efSDavid Edmondson 			xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_OKAY);
1577843e1988Sjohnlev 		}
1578843e1988Sjohnlev 
157956567907SDavid Edmondson 		txpp++;
158056567907SDavid Edmondson 		cop++;
158156567907SDavid Edmondson 		i--;
158256567907SDavid Edmondson 	}
1583843e1988Sjohnlev 
1584843e1988Sjohnlev 	goto around;
1585843e1988Sjohnlev 	/* NOTREACHED */
1586843e1988Sjohnlev }
1587843e1988Sjohnlev 
1588843e1988Sjohnlev static uint_t
1589843e1988Sjohnlev xnb_intr(caddr_t arg)
1590843e1988Sjohnlev {
1591843e1988Sjohnlev 	xnb_t *xnbp = (xnb_t *)arg;
1592843e1988Sjohnlev 	mblk_t *mp;
1593843e1988Sjohnlev 
1594551bc2a6Smrj 	xnbp->xnb_stat_intr++;
1595843e1988Sjohnlev 
1596024c26efSMax zhen 	mutex_enter(&xnbp->xnb_tx_lock);
1597843e1988Sjohnlev 
1598551bc2a6Smrj 	ASSERT(xnbp->xnb_connected);
1599843e1988Sjohnlev 
1600024c26efSMax zhen 	mp = xnb_from_peer(xnbp);
1601843e1988Sjohnlev 
1602024c26efSMax zhen 	mutex_exit(&xnbp->xnb_tx_lock);
1603843e1988Sjohnlev 
1604551bc2a6Smrj 	if (!xnbp->xnb_hotplugged) {
1605024c26efSMax zhen 		xnbp->xnb_stat_tx_too_early++;
1606843e1988Sjohnlev 		goto fail;
1607843e1988Sjohnlev 	}
1608843e1988Sjohnlev 	if (mp == NULL) {
1609551bc2a6Smrj 		xnbp->xnb_stat_spurious_intr++;
1610843e1988Sjohnlev 		goto fail;
1611843e1988Sjohnlev 	}
1612843e1988Sjohnlev 
1613024c26efSMax zhen 	xnbp->xnb_flavour->xf_from_peer(xnbp, mp);
1614843e1988Sjohnlev 
1615843e1988Sjohnlev 	return (DDI_INTR_CLAIMED);
1616843e1988Sjohnlev 
1617843e1988Sjohnlev fail:
1618843e1988Sjohnlev 	freemsgchain(mp);
1619843e1988Sjohnlev 	return (DDI_INTR_CLAIMED);
1620843e1988Sjohnlev }
1621843e1988Sjohnlev 
162256567907SDavid Edmondson /*
162356567907SDavid Edmondson  * Read our configuration from xenstore.
162456567907SDavid Edmondson  */
162556567907SDavid Edmondson boolean_t
162656567907SDavid Edmondson xnb_read_xs_config(xnb_t *xnbp)
162756567907SDavid Edmondson {
162856567907SDavid Edmondson 	char *xsname;
162956567907SDavid Edmondson 	char mac[ETHERADDRL * 3];
163056567907SDavid Edmondson 
163156567907SDavid Edmondson 	xsname = xvdi_get_xsname(xnbp->xnb_devinfo);
163256567907SDavid Edmondson 
163356567907SDavid Edmondson 	if (xenbus_scanf(XBT_NULL, xsname,
163456567907SDavid Edmondson 	    "mac", "%s", mac) != 0) {
163556567907SDavid Edmondson 		cmn_err(CE_WARN, "xnb_attach: "
163656567907SDavid Edmondson 		    "cannot read mac address from %s",
163756567907SDavid Edmondson 		    xsname);
163856567907SDavid Edmondson 		return (B_FALSE);
163956567907SDavid Edmondson 	}
164056567907SDavid Edmondson 
164156567907SDavid Edmondson 	if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) {
164256567907SDavid Edmondson 		cmn_err(CE_WARN,
164356567907SDavid Edmondson 		    "xnb_attach: cannot parse mac address %s",
164456567907SDavid Edmondson 		    mac);
164556567907SDavid Edmondson 		return (B_FALSE);
164656567907SDavid Edmondson 	}
164756567907SDavid Edmondson 
164856567907SDavid Edmondson 	return (B_TRUE);
164956567907SDavid Edmondson }
165056567907SDavid Edmondson 
165156567907SDavid Edmondson /*
165256567907SDavid Edmondson  * Read the configuration of the peer from xenstore.
165356567907SDavid Edmondson  */
165456567907SDavid Edmondson boolean_t
165556567907SDavid Edmondson xnb_read_oe_config(xnb_t *xnbp)
165656567907SDavid Edmondson {
165756567907SDavid Edmondson 	char *oename;
165856567907SDavid Edmondson 	int i;
165956567907SDavid Edmondson 
166056567907SDavid Edmondson 	oename = xvdi_get_oename(xnbp->xnb_devinfo);
166156567907SDavid Edmondson 
166256567907SDavid Edmondson 	if (xenbus_gather(XBT_NULL, oename,
166356567907SDavid Edmondson 	    "event-channel", "%u", &xnbp->xnb_fe_evtchn,
166456567907SDavid Edmondson 	    "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref,
166556567907SDavid Edmondson 	    "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref,
166656567907SDavid Edmondson 	    NULL) != 0) {
166756567907SDavid Edmondson 		cmn_err(CE_WARN, "xnb_read_oe_config: "
166856567907SDavid Edmondson 		    "cannot read other-end details from %s",
166956567907SDavid Edmondson 		    oename);
167056567907SDavid Edmondson 		return (B_FALSE);
167156567907SDavid Edmondson 	}
167256567907SDavid Edmondson 
167356567907SDavid Edmondson 	/*
167456567907SDavid Edmondson 	 * Check whether our peer requests receive side hypervisor
167556567907SDavid Edmondson 	 * copy.
167656567907SDavid Edmondson 	 */
167756567907SDavid Edmondson 	if (xenbus_scanf(XBT_NULL, oename,
167856567907SDavid Edmondson 	    "request-rx-copy", "%d", &i) != 0)
167956567907SDavid Edmondson 		i = 0;
168056567907SDavid Edmondson 	if (i != 0)
168156567907SDavid Edmondson 		xnbp->xnb_rx_hv_copy = B_TRUE;
168256567907SDavid Edmondson 
168356567907SDavid Edmondson 	/*
168456567907SDavid Edmondson 	 * Check whether our peer requests multicast_control.
168556567907SDavid Edmondson 	 */
168656567907SDavid Edmondson 	if (xenbus_scanf(XBT_NULL, oename,
168756567907SDavid Edmondson 	    "request-multicast-control", "%d", &i) != 0)
168856567907SDavid Edmondson 		i = 0;
168956567907SDavid Edmondson 	if (i != 0)
169056567907SDavid Edmondson 		xnbp->xnb_multicast_control = B_TRUE;
169156567907SDavid Edmondson 
169256567907SDavid Edmondson 	/*
169356567907SDavid Edmondson 	 * The Linux backend driver here checks to see if the peer has
169456567907SDavid Edmondson 	 * set 'feature-no-csum-offload'. This is used to indicate
169556567907SDavid Edmondson 	 * that the guest cannot handle receiving packets without a
169656567907SDavid Edmondson 	 * valid checksum. We don't check here, because packets passed
169756567907SDavid Edmondson 	 * to the peer _always_ have a valid checksum.
169856567907SDavid Edmondson 	 *
169956567907SDavid Edmondson 	 * There are three cases:
170056567907SDavid Edmondson 	 *
170156567907SDavid Edmondson 	 * - the NIC is dedicated: packets from the wire should always
170256567907SDavid Edmondson 	 *   have a valid checksum. If the hardware validates the
170356567907SDavid Edmondson 	 *   checksum then the relevant bit will be set in the packet
170456567907SDavid Edmondson 	 *   attributes and we will inform the peer. It can choose to
170556567907SDavid Edmondson 	 *   ignore the hardware verification.
170656567907SDavid Edmondson 	 *
170756567907SDavid Edmondson 	 * - the NIC is shared (VNIC) and a packet originates from the
170856567907SDavid Edmondson 	 *   wire: this is the same as the case above - the packets
170956567907SDavid Edmondson 	 *   will have a valid checksum.
171056567907SDavid Edmondson 	 *
171156567907SDavid Edmondson 	 * - the NIC is shared (VNIC) and a packet originates from the
171256567907SDavid Edmondson 	 *   host: the MAC layer ensures that all such packets have a
171356567907SDavid Edmondson 	 *   valid checksum by calculating one if the stack did not.
171456567907SDavid Edmondson 	 */
171556567907SDavid Edmondson 
171656567907SDavid Edmondson 	return (B_TRUE);
171756567907SDavid Edmondson }
171856567907SDavid Edmondson 
171956567907SDavid Edmondson void
172056567907SDavid Edmondson xnb_start_connect(xnb_t *xnbp)
172156567907SDavid Edmondson {
172256567907SDavid Edmondson 	dev_info_t  *dip = xnbp->xnb_devinfo;
172356567907SDavid Edmondson 
172456567907SDavid Edmondson 	if (!xnb_connect_rings(dip)) {
172556567907SDavid Edmondson 		cmn_err(CE_WARN, "xnb_start_connect: "
172656567907SDavid Edmondson 		    "cannot connect rings");
172756567907SDavid Edmondson 		goto failed;
172856567907SDavid Edmondson 	}
172956567907SDavid Edmondson 
173056567907SDavid Edmondson 	if (!xnbp->xnb_flavour->xf_start_connect(xnbp)) {
173156567907SDavid Edmondson 		cmn_err(CE_WARN, "xnb_start_connect: "
173256567907SDavid Edmondson 		    "flavour failed to connect");
173356567907SDavid Edmondson 		goto failed;
173456567907SDavid Edmondson 	}
173556567907SDavid Edmondson 
173656567907SDavid Edmondson 	(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected);
173756567907SDavid Edmondson 	return;
173856567907SDavid Edmondson 
173956567907SDavid Edmondson failed:
174056567907SDavid Edmondson 	xnbp->xnb_flavour->xf_peer_disconnected(xnbp);
174156567907SDavid Edmondson 	xnb_disconnect_rings(dip);
174256567907SDavid Edmondson 	(void) xvdi_switch_state(dip, XBT_NULL,
174356567907SDavid Edmondson 	    XenbusStateClosed);
174456567907SDavid Edmondson 	(void) xvdi_post_event(dip, XEN_HP_REMOVE);
174556567907SDavid Edmondson }
174656567907SDavid Edmondson 
1747843e1988Sjohnlev static boolean_t
1748843e1988Sjohnlev xnb_connect_rings(dev_info_t *dip)
1749843e1988Sjohnlev {
1750843e1988Sjohnlev 	xnb_t *xnbp = ddi_get_driver_private(dip);
1751843e1988Sjohnlev 	struct gnttab_map_grant_ref map_op;
1752843e1988Sjohnlev 
1753843e1988Sjohnlev 	/*
1754843e1988Sjohnlev 	 * Cannot attempt to connect the rings if already connected.
1755843e1988Sjohnlev 	 */
1756551bc2a6Smrj 	ASSERT(!xnbp->xnb_connected);
1757843e1988Sjohnlev 
1758843e1988Sjohnlev 	/*
1759843e1988Sjohnlev 	 * 1. allocate a vaddr for the tx page, one for the rx page.
1760843e1988Sjohnlev 	 * 2. call GNTTABOP_map_grant_ref to map the relevant pages
1761843e1988Sjohnlev 	 *    into the allocated vaddr (one for tx, one for rx).
1762843e1988Sjohnlev 	 * 3. call EVTCHNOP_bind_interdomain to have the event channel
1763843e1988Sjohnlev 	 *    bound to this domain.
1764843e1988Sjohnlev 	 * 4. associate the event channel with an interrupt.
176556567907SDavid Edmondson 	 * 5. enable the interrupt.
1766843e1988Sjohnlev 	 */
1767843e1988Sjohnlev 
1768843e1988Sjohnlev 	/* 1.tx */
1769551bc2a6Smrj 	xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
1770843e1988Sjohnlev 	    0, 0, 0, 0, VM_SLEEP);
1771551bc2a6Smrj 	ASSERT(xnbp->xnb_tx_ring_addr != NULL);
1772843e1988Sjohnlev 
1773843e1988Sjohnlev 	/* 2.tx */
1774551bc2a6Smrj 	map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr);
1775843e1988Sjohnlev 	map_op.flags = GNTMAP_host_map;
1776551bc2a6Smrj 	map_op.ref = xnbp->xnb_tx_ring_ref;
1777551bc2a6Smrj 	map_op.dom = xnbp->xnb_peer;
17787eea693dSMark Johnson 	hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr, NULL);
17797eea693dSMark Johnson 	if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 ||
17807eea693dSMark Johnson 	    map_op.status != 0) {
1781843e1988Sjohnlev 		cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page.");
1782843e1988Sjohnlev 		goto fail;
1783843e1988Sjohnlev 	}
1784551bc2a6Smrj 	xnbp->xnb_tx_ring_handle = map_op.handle;
1785843e1988Sjohnlev 
1786843e1988Sjohnlev 	/* LINTED: constant in conditional context */
1787551bc2a6Smrj 	BACK_RING_INIT(&xnbp->xnb_tx_ring,
1788551bc2a6Smrj 	    (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE);
1789843e1988Sjohnlev 
1790843e1988Sjohnlev 	/* 1.rx */
1791551bc2a6Smrj 	xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
1792843e1988Sjohnlev 	    0, 0, 0, 0, VM_SLEEP);
1793551bc2a6Smrj 	ASSERT(xnbp->xnb_rx_ring_addr != NULL);
1794843e1988Sjohnlev 
1795843e1988Sjohnlev 	/* 2.rx */
1796551bc2a6Smrj 	map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr);
1797843e1988Sjohnlev 	map_op.flags = GNTMAP_host_map;
1798551bc2a6Smrj 	map_op.ref = xnbp->xnb_rx_ring_ref;
1799551bc2a6Smrj 	map_op.dom = xnbp->xnb_peer;
18007eea693dSMark Johnson 	hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr, NULL);
18017eea693dSMark Johnson 	if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 ||
18027eea693dSMark Johnson 	    map_op.status != 0) {
1803843e1988Sjohnlev 		cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page.");
1804843e1988Sjohnlev 		goto fail;
1805843e1988Sjohnlev 	}
1806551bc2a6Smrj 	xnbp->xnb_rx_ring_handle = map_op.handle;
1807843e1988Sjohnlev 
1808843e1988Sjohnlev 	/* LINTED: constant in conditional context */
1809551bc2a6Smrj 	BACK_RING_INIT(&xnbp->xnb_rx_ring,
1810551bc2a6Smrj 	    (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE);
1811843e1988Sjohnlev 
1812843e1988Sjohnlev 	/* 3 */
181356567907SDavid Edmondson 	if (xvdi_bind_evtchn(dip, xnbp->xnb_fe_evtchn) != DDI_SUCCESS) {
1814843e1988Sjohnlev 		cmn_err(CE_WARN, "xnb_connect_rings: "
1815551bc2a6Smrj 		    "cannot bind event channel %d", xnbp->xnb_evtchn);
1816551bc2a6Smrj 		xnbp->xnb_evtchn = INVALID_EVTCHN;
1817843e1988Sjohnlev 		goto fail;
1818843e1988Sjohnlev 	}
1819551bc2a6Smrj 	xnbp->xnb_evtchn = xvdi_get_evtchn(dip);
1820843e1988Sjohnlev 
1821843e1988Sjohnlev 	/*
1822843e1988Sjohnlev 	 * It would be good to set the state to XenbusStateConnected
1823843e1988Sjohnlev 	 * here as well, but then what if ddi_add_intr() failed?
1824843e1988Sjohnlev 	 * Changing the state in the store will be noticed by the peer
1825843e1988Sjohnlev 	 * and cannot be "taken back".
1826843e1988Sjohnlev 	 */
1827551bc2a6Smrj 	mutex_enter(&xnbp->xnb_tx_lock);
1828551bc2a6Smrj 	mutex_enter(&xnbp->xnb_rx_lock);
1829843e1988Sjohnlev 
1830551bc2a6Smrj 	xnbp->xnb_connected = B_TRUE;
1831843e1988Sjohnlev 
1832551bc2a6Smrj 	mutex_exit(&xnbp->xnb_rx_lock);
1833551bc2a6Smrj 	mutex_exit(&xnbp->xnb_tx_lock);
1834843e1988Sjohnlev 
183556567907SDavid Edmondson 	/* 4, 5 */
1836843e1988Sjohnlev 	if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp)
1837843e1988Sjohnlev 	    != DDI_SUCCESS) {
1838843e1988Sjohnlev 		cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt");
1839843e1988Sjohnlev 		goto fail;
1840843e1988Sjohnlev 	}
1841551bc2a6Smrj 	xnbp->xnb_irq = B_TRUE;
1842843e1988Sjohnlev 
1843843e1988Sjohnlev 	return (B_TRUE);
1844843e1988Sjohnlev 
1845843e1988Sjohnlev fail:
1846551bc2a6Smrj 	mutex_enter(&xnbp->xnb_tx_lock);
1847551bc2a6Smrj 	mutex_enter(&xnbp->xnb_rx_lock);
1848843e1988Sjohnlev 
1849551bc2a6Smrj 	xnbp->xnb_connected = B_FALSE;
185056567907SDavid Edmondson 
1851551bc2a6Smrj 	mutex_exit(&xnbp->xnb_rx_lock);
1852551bc2a6Smrj 	mutex_exit(&xnbp->xnb_tx_lock);
1853843e1988Sjohnlev 
1854843e1988Sjohnlev 	return (B_FALSE);
1855843e1988Sjohnlev }
1856843e1988Sjohnlev 
1857843e1988Sjohnlev static void
1858843e1988Sjohnlev xnb_disconnect_rings(dev_info_t *dip)
1859843e1988Sjohnlev {
1860843e1988Sjohnlev 	xnb_t *xnbp = ddi_get_driver_private(dip);
1861843e1988Sjohnlev 
1862551bc2a6Smrj 	if (xnbp->xnb_irq) {
1863843e1988Sjohnlev 		ddi_remove_intr(dip, 0, NULL);
1864551bc2a6Smrj 		xnbp->xnb_irq = B_FALSE;
1865843e1988Sjohnlev 	}
1866843e1988Sjohnlev 
1867551bc2a6Smrj 	if (xnbp->xnb_evtchn != INVALID_EVTCHN) {
1868843e1988Sjohnlev 		xvdi_free_evtchn(dip);
1869551bc2a6Smrj 		xnbp->xnb_evtchn = INVALID_EVTCHN;
1870843e1988Sjohnlev 	}
1871843e1988Sjohnlev 
1872551bc2a6Smrj 	if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) {
1873843e1988Sjohnlev 		struct gnttab_unmap_grant_ref unmap_op;
1874843e1988Sjohnlev 
1875551bc2a6Smrj 		unmap_op.host_addr = (uint64_t)(uintptr_t)
1876551bc2a6Smrj 		    xnbp->xnb_rx_ring_addr;
1877843e1988Sjohnlev 		unmap_op.dev_bus_addr = 0;
1878551bc2a6Smrj 		unmap_op.handle = xnbp->xnb_rx_ring_handle;
1879843e1988Sjohnlev 		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
1880843e1988Sjohnlev 		    &unmap_op, 1) != 0)
1881843e1988Sjohnlev 			cmn_err(CE_WARN, "xnb_disconnect_rings: "
1882843e1988Sjohnlev 			    "cannot unmap rx-ring page (%d)",
1883843e1988Sjohnlev 			    unmap_op.status);
1884843e1988Sjohnlev 
1885551bc2a6Smrj 		xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE;
1886843e1988Sjohnlev 	}
1887843e1988Sjohnlev 
1888551bc2a6Smrj 	if (xnbp->xnb_rx_ring_addr != NULL) {
1889551bc2a6Smrj 		hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr);
1890551bc2a6Smrj 		vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE);
1891551bc2a6Smrj 		xnbp->xnb_rx_ring_addr = NULL;
1892843e1988Sjohnlev 	}
1893843e1988Sjohnlev 
1894551bc2a6Smrj 	if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) {
1895843e1988Sjohnlev 		struct gnttab_unmap_grant_ref unmap_op;
1896843e1988Sjohnlev 
1897551bc2a6Smrj 		unmap_op.host_addr = (uint64_t)(uintptr_t)
1898551bc2a6Smrj 		    xnbp->xnb_tx_ring_addr;
1899843e1988Sjohnlev 		unmap_op.dev_bus_addr = 0;
1900551bc2a6Smrj 		unmap_op.handle = xnbp->xnb_tx_ring_handle;
1901843e1988Sjohnlev 		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
1902843e1988Sjohnlev 		    &unmap_op, 1) != 0)
1903843e1988Sjohnlev 			cmn_err(CE_WARN, "xnb_disconnect_rings: "
1904843e1988Sjohnlev 			    "cannot unmap tx-ring page (%d)",
1905843e1988Sjohnlev 			    unmap_op.status);
1906843e1988Sjohnlev 
1907551bc2a6Smrj 		xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE;
1908843e1988Sjohnlev 	}
1909843e1988Sjohnlev 
1910551bc2a6Smrj 	if (xnbp->xnb_tx_ring_addr != NULL) {
1911551bc2a6Smrj 		hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr);
1912551bc2a6Smrj 		vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE);
1913551bc2a6Smrj 		xnbp->xnb_tx_ring_addr = NULL;
1914843e1988Sjohnlev 	}
1915843e1988Sjohnlev }
1916843e1988Sjohnlev 
1917843e1988Sjohnlev static void
1918843e1988Sjohnlev xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id,
1919843e1988Sjohnlev     void *arg, void *impl_data)
1920843e1988Sjohnlev {
192156567907SDavid Edmondson 	_NOTE(ARGUNUSED(id, arg));
1922843e1988Sjohnlev 	xnb_t *xnbp = ddi_get_driver_private(dip);
1923843e1988Sjohnlev 	XenbusState new_state = *(XenbusState *)impl_data;
1924843e1988Sjohnlev 
1925843e1988Sjohnlev 	ASSERT(xnbp != NULL);
1926843e1988Sjohnlev 
1927843e1988Sjohnlev 	switch (new_state) {
1928843e1988Sjohnlev 	case XenbusStateConnected:
192908cfff84Scz147101 		/* spurious state change */
193008cfff84Scz147101 		if (xnbp->xnb_connected)
193108cfff84Scz147101 			return;
193208cfff84Scz147101 
193356567907SDavid Edmondson 		if (!xnb_read_oe_config(xnbp) ||
193456567907SDavid Edmondson 		    !xnbp->xnb_flavour->xf_peer_connected(xnbp)) {
193556567907SDavid Edmondson 			cmn_err(CE_WARN, "xnb_oe_state_change: "
193656567907SDavid Edmondson 			    "read otherend config error");
1937843e1988Sjohnlev 			(void) xvdi_switch_state(dip, XBT_NULL,
1938843e1988Sjohnlev 			    XenbusStateClosed);
1939843e1988Sjohnlev 			(void) xvdi_post_event(dip, XEN_HP_REMOVE);
194056567907SDavid Edmondson 
194156567907SDavid Edmondson 			break;
1942843e1988Sjohnlev 		}
1943843e1988Sjohnlev 
194456567907SDavid Edmondson 
194556567907SDavid Edmondson 		mutex_enter(&xnbp->xnb_state_lock);
194656567907SDavid Edmondson 		xnbp->xnb_fe_status = XNB_STATE_READY;
194756567907SDavid Edmondson 		if (xnbp->xnb_be_status == XNB_STATE_READY)
194856567907SDavid Edmondson 			xnb_start_connect(xnbp);
194956567907SDavid Edmondson 		mutex_exit(&xnbp->xnb_state_lock);
195056567907SDavid Edmondson 
1951843e1988Sjohnlev 		/*
1952843e1988Sjohnlev 		 * Now that we've attempted to connect it's reasonable
1953843e1988Sjohnlev 		 * to allow an attempt to detach.
1954843e1988Sjohnlev 		 */
1955551bc2a6Smrj 		xnbp->xnb_detachable = B_TRUE;
1956843e1988Sjohnlev 
1957843e1988Sjohnlev 		break;
1958843e1988Sjohnlev 
1959843e1988Sjohnlev 	case XenbusStateClosing:
1960843e1988Sjohnlev 		(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing);
1961843e1988Sjohnlev 
1962843e1988Sjohnlev 		break;
1963843e1988Sjohnlev 
1964843e1988Sjohnlev 	case XenbusStateClosed:
1965551bc2a6Smrj 		xnbp->xnb_flavour->xf_peer_disconnected(xnbp);
1966843e1988Sjohnlev 
1967551bc2a6Smrj 		mutex_enter(&xnbp->xnb_tx_lock);
1968551bc2a6Smrj 		mutex_enter(&xnbp->xnb_rx_lock);
1969843e1988Sjohnlev 
1970843e1988Sjohnlev 		xnb_disconnect_rings(dip);
1971551bc2a6Smrj 		xnbp->xnb_connected = B_FALSE;
1972843e1988Sjohnlev 
1973551bc2a6Smrj 		mutex_exit(&xnbp->xnb_rx_lock);
1974551bc2a6Smrj 		mutex_exit(&xnbp->xnb_tx_lock);
1975843e1988Sjohnlev 
1976843e1988Sjohnlev 		(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
1977843e1988Sjohnlev 		(void) xvdi_post_event(dip, XEN_HP_REMOVE);
1978843e1988Sjohnlev 		/*
1979843e1988Sjohnlev 		 * In all likelyhood this is already set (in the above
1980843e1988Sjohnlev 		 * case), but if the peer never attempted to connect
1981843e1988Sjohnlev 		 * and the domain is destroyed we get here without
1982843e1988Sjohnlev 		 * having been through the case above, so we set it to
1983843e1988Sjohnlev 		 * be sure.
1984843e1988Sjohnlev 		 */
1985551bc2a6Smrj 		xnbp->xnb_detachable = B_TRUE;
1986843e1988Sjohnlev 
1987843e1988Sjohnlev 		break;
1988843e1988Sjohnlev 
1989843e1988Sjohnlev 	default:
1990843e1988Sjohnlev 		break;
1991843e1988Sjohnlev 	}
1992843e1988Sjohnlev }
1993843e1988Sjohnlev 
1994843e1988Sjohnlev static void
1995843e1988Sjohnlev xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id,
1996843e1988Sjohnlev     void *arg, void *impl_data)
1997843e1988Sjohnlev {
199856567907SDavid Edmondson 	_NOTE(ARGUNUSED(id, arg));
1999843e1988Sjohnlev 	xnb_t *xnbp = ddi_get_driver_private(dip);
2000843e1988Sjohnlev 	xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data;
2001843e1988Sjohnlev 
2002843e1988Sjohnlev 	ASSERT(xnbp != NULL);
2003843e1988Sjohnlev 
2004843e1988Sjohnlev 	switch (state) {
2005843e1988Sjohnlev 	case Connected:
200608cfff84Scz147101 		/* spurious hotplug event */
200708cfff84Scz147101 		if (xnbp->xnb_hotplugged)
200856567907SDavid Edmondson 			break;
200908cfff84Scz147101 
201056567907SDavid Edmondson 		if (!xnb_read_xs_config(xnbp))
201156567907SDavid Edmondson 			break;
201256567907SDavid Edmondson 
201356567907SDavid Edmondson 		if (!xnbp->xnb_flavour->xf_hotplug_connected(xnbp))
201456567907SDavid Edmondson 			break;
2015843e1988Sjohnlev 
2016551bc2a6Smrj 		mutex_enter(&xnbp->xnb_tx_lock);
2017551bc2a6Smrj 		mutex_enter(&xnbp->xnb_rx_lock);
2018843e1988Sjohnlev 
201956567907SDavid Edmondson 		xnbp->xnb_hotplugged = B_TRUE;
2020843e1988Sjohnlev 
2021551bc2a6Smrj 		mutex_exit(&xnbp->xnb_rx_lock);
2022551bc2a6Smrj 		mutex_exit(&xnbp->xnb_tx_lock);
202356567907SDavid Edmondson 
202456567907SDavid Edmondson 		mutex_enter(&xnbp->xnb_state_lock);
202556567907SDavid Edmondson 		xnbp->xnb_be_status = XNB_STATE_READY;
202656567907SDavid Edmondson 		if (xnbp->xnb_fe_status == XNB_STATE_READY)
202756567907SDavid Edmondson 			xnb_start_connect(xnbp);
202856567907SDavid Edmondson 		mutex_exit(&xnbp->xnb_state_lock);
202956567907SDavid Edmondson 
2030843e1988Sjohnlev 		break;
2031843e1988Sjohnlev 
2032843e1988Sjohnlev 	default:
2033843e1988Sjohnlev 		break;
2034843e1988Sjohnlev 	}
2035843e1988Sjohnlev }
2036843e1988Sjohnlev 
2037843e1988Sjohnlev static struct modldrv modldrv = {
2038a859da42SDavid Edmondson 	&mod_miscops, "xnb",
2039843e1988Sjohnlev };
2040843e1988Sjohnlev 
2041843e1988Sjohnlev static struct modlinkage modlinkage = {
2042843e1988Sjohnlev 	MODREV_1, &modldrv, NULL
2043843e1988Sjohnlev };
2044843e1988Sjohnlev 
2045843e1988Sjohnlev int
2046843e1988Sjohnlev _init(void)
2047843e1988Sjohnlev {
2048843e1988Sjohnlev 	int i;
2049843e1988Sjohnlev 
2050843e1988Sjohnlev 	mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL);
2051843e1988Sjohnlev 
2052843e1988Sjohnlev 	i = mod_install(&modlinkage);
205356567907SDavid Edmondson 	if (i != DDI_SUCCESS)
2054843e1988Sjohnlev 		mutex_destroy(&xnb_alloc_page_lock);
205556567907SDavid Edmondson 
2056843e1988Sjohnlev 	return (i);
2057843e1988Sjohnlev }
2058843e1988Sjohnlev 
2059843e1988Sjohnlev int
2060843e1988Sjohnlev _info(struct modinfo *modinfop)
2061843e1988Sjohnlev {
2062843e1988Sjohnlev 	return (mod_info(&modlinkage, modinfop));
2063843e1988Sjohnlev }
2064843e1988Sjohnlev 
2065843e1988Sjohnlev int
2066843e1988Sjohnlev _fini(void)
2067843e1988Sjohnlev {
2068843e1988Sjohnlev 	int i;
2069843e1988Sjohnlev 
2070843e1988Sjohnlev 	i = mod_remove(&modlinkage);
207156567907SDavid Edmondson 	if (i == DDI_SUCCESS)
2072843e1988Sjohnlev 		mutex_destroy(&xnb_alloc_page_lock);
207356567907SDavid Edmondson 
2074843e1988Sjohnlev 	return (i);
2075843e1988Sjohnlev }
2076