xref: /titanic_51/usr/src/uts/sun4v/io/vsw_hio.c (revision 628e3cbed6489fa1db545d8524a06cd6535af456)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/debug.h>
30 #include <sys/time.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/user.h>
34 #include <sys/stropts.h>
35 #include <sys/stream.h>
36 #include <sys/strlog.h>
37 #include <sys/strsubr.h>
38 #include <sys/cmn_err.h>
39 #include <sys/cpu.h>
40 #include <sys/kmem.h>
41 #include <sys/conf.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/ksynch.h>
45 #include <sys/stat.h>
46 #include <sys/kstat.h>
47 #include <sys/vtrace.h>
48 #include <sys/strsun.h>
49 #include <sys/dlpi.h>
50 #include <sys/ethernet.h>
51 #include <net/if.h>
52 #include <sys/varargs.h>
53 #include <sys/machsystm.h>
54 #include <sys/modctl.h>
55 #include <sys/modhash.h>
56 #include <sys/mac.h>
57 #include <sys/mac_ether.h>
58 #include <sys/taskq.h>
59 #include <sys/note.h>
60 #include <sys/mach_descrip.h>
61 #include <sys/mac.h>
62 #include <sys/mdeg.h>
63 #include <sys/ldc.h>
64 #include <sys/vsw_fdb.h>
65 #include <sys/vsw.h>
66 #include <sys/vio_mailbox.h>
67 #include <sys/vnet_mailbox.h>
68 #include <sys/vnet_common.h>
69 #include <sys/vio_util.h>
70 #include <sys/sdt.h>
71 #include <sys/atomic.h>
72 #include <sys/callb.h>
73 
74 
75 #define	VSW_DDS_NEXT_REQID(vsharep)	(++vsharep->vs_req_id)
76 
77 extern boolean_t vsw_hio_enabled;		/* HybridIO enabled? */
78 extern int vsw_hio_max_cleanup_retries;
79 extern int vsw_hio_cleanup_delay;
80 
81 /* Functions imported from other files */
82 extern int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t);
83 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int);
84 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int);
85 extern void vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate);
86 
87 /* Functions exported to other files */
88 void vsw_hio_init(vsw_t *vswp);
89 void vsw_hio_cleanup(vsw_t *vswp);
90 void vsw_hio_start(vsw_t *vswp, vsw_ldc_t *ldcp);
91 void vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp);
92 void vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg);
93 void vsw_hio_start_ports(vsw_t *vswp);
94 void vsw_hio_stop_port(vsw_port_t *portp);
95 
96 /* Support functions */
97 static void vsw_hio_free_all_shares(vsw_t *vswp, boolean_t reboot);
98 static vsw_share_t *vsw_hio_alloc_share(vsw_t *vswp, vsw_ldc_t *ldcp);
99 static void vsw_hio_free_share(vsw_share_t *vsharep);
100 static vsw_share_t *vsw_hio_find_free_share(vsw_t *vswp);
101 static vsw_share_t *vsw_hio_find_vshare_ldcid(vsw_t *vswp, uint64_t ldc_id);
102 static vsw_share_t *vsw_hio_find_vshare_port(vsw_t *vswp, vsw_port_t *portp);
103 static int vsw_send_dds_msg(vsw_ldc_t *ldcp, uint8_t dds_subclass,
104     uint64_t cookie, uint64_t macaddr, uint32_t req_id);
105 static int vsw_send_dds_resp_msg(vsw_ldc_t *ldcp, vio_dds_msg_t *dmsg, int ack);
106 static int vsw_hio_send_delshare_msg(vsw_share_t *vsharep);
107 static int vsw_hio_bind_macaddr(vsw_share_t *vsharep);
108 static void vsw_hio_unbind_macaddr(vsw_share_t *vsharep);
109 static boolean_t vsw_hio_reboot_callb(void *arg, int code);
110 static boolean_t vsw_hio_panic_callb(void *arg, int code);
111 
112 
113 /*
114  * vsw_hio_init -- Initialize the HybridIO related info.
115  *	- Query SHARES and RINGS capability. Both capabilities
116  *	  need to be supported by the physical-device.
117  */
118 void
119 vsw_hio_init(vsw_t *vswp)
120 {
121 	vsw_hio_t	*hiop = &vswp->vhio;
122 	int		i;
123 	int		rv;
124 
125 	D1(vswp, "%s:enter\n", __func__);
126 	mutex_enter(&vswp->hw_lock);
127 	if (vsw_hio_enabled == B_FALSE) {
128 		mutex_exit(&vswp->hw_lock);
129 		return;
130 	}
131 
132 	vswp->hio_capable = B_FALSE;
133 	rv = mac_capab_get(vswp->mh, MAC_CAPAB_SHARES, &hiop->vh_scapab);
134 	if (rv == B_FALSE) {
135 		D2(vswp, "%s: %s is not HybridIO capable\n", __func__,
136 		    vswp->physname);
137 		mutex_exit(&vswp->hw_lock);
138 		return;
139 	}
140 	rv = mac_capab_get(vswp->mh, MAC_CAPAB_RINGS, &hiop->vh_rcapab);
141 	if (rv == B_FALSE) {
142 		DWARN(vswp, "%s: %s has no RINGS capability\n", __func__,
143 		    vswp->physname);
144 		mutex_exit(&vswp->hw_lock);
145 		return;
146 	}
147 	hiop->vh_num_shares = hiop->vh_scapab.ms_snum;
148 	hiop->vh_shares = kmem_zalloc((sizeof (vsw_share_t) *
149 	    hiop->vh_num_shares), KM_SLEEP);
150 	for (i = 0; i < hiop->vh_num_shares; i++) {
151 		hiop->vh_shares[i].vs_state = VSW_SHARE_FREE;
152 		hiop->vh_shares[i].vs_index = i;
153 		hiop->vh_shares[i].vs_vswp = vswp;
154 	}
155 	vswp->hio_capable = B_TRUE;
156 
157 	/*
158 	 * Register to get reboot and panic events so that
159 	 * we can cleanup HybridIO resources gracefully.
160 	 */
161 	vswp->hio_reboot_cb_id = callb_add(vsw_hio_reboot_callb,
162 	    (void *)vswp, CB_CL_MDBOOT, "vsw_hio");
163 
164 	vswp->hio_panic_cb_id = callb_add(vsw_hio_panic_callb,
165 	    (void *)vswp, CB_CL_PANIC, "vsw_hio");
166 
167 	D2(vswp, "%s: %s is HybridIO capable num_shares=%d\n", __func__,
168 	    vswp->physname, hiop->vh_num_shares);
169 	D1(vswp, "%s:exit\n", __func__);
170 	mutex_exit(&vswp->hw_lock);
171 }
172 
173 /*
174  * vsw_hio_alloc_share -- Allocate and setup the share for a guest domain.
175  *	- Allocate a free share.
176  *	- Bind the Guest's MAC address.
177  */
178 static vsw_share_t *
179 vsw_hio_alloc_share(vsw_t *vswp, vsw_ldc_t *ldcp)
180 {
181 	vsw_hio_t	*hiop = &vswp->vhio;
182 	mac_capab_share_t *hcapab = &hiop->vh_scapab;
183 	vsw_share_t	*vsharep;
184 	vsw_port_t	*portp = ldcp->ldc_port;
185 	uint64_t	ldc_id = ldcp->ldc_id;
186 	uint32_t	rmin, rmax;
187 	uint64_t	rmap;
188 	int		rv;
189 
190 	D1(vswp, "%s:enter\n", __func__);
191 	vsharep = vsw_hio_find_free_share(vswp);
192 	if (vsharep == NULL) {
193 		/* No free shares available */
194 		return (NULL);
195 	}
196 	/*
197 	 * Allocate a Share - it will come with rings/groups
198 	 * already assigned to it.
199 	 */
200 	rv = hcapab->ms_salloc(hcapab->ms_handle, ldc_id,
201 	    &vsharep->vs_cookie, &vsharep->vs_shdl);
202 	if (rv != 0) {
203 		D2(vswp, "Alloc a share failed for ldc=0x%lx rv=%d",
204 		    ldc_id, rv);
205 		return (NULL);
206 	}
207 
208 	/*
209 	 * Query the RX group number to bind the port's
210 	 * MAC address to it.
211 	 */
212 	hcapab->ms_squery(vsharep->vs_shdl, MAC_RING_TYPE_RX,
213 	    &rmin, &rmax, &rmap, &vsharep->vs_gnum);
214 
215 	/* Cache some useful info */
216 	vsharep->vs_ldcid = ldcp->ldc_id;
217 	vsharep->vs_macaddr = vnet_macaddr_strtoul(
218 	    portp->p_macaddr.ether_addr_octet);
219 	vsharep->vs_portp = ldcp->ldc_port;
220 
221 	/* Bind the Guest's MAC address */
222 	rv = vsw_hio_bind_macaddr(vsharep);
223 	if (rv != 0) {
224 		/* something went wrong, cleanup */
225 		hcapab->ms_sfree(vsharep->vs_shdl);
226 		return (NULL);
227 	}
228 
229 	vsharep->vs_state |= VSW_SHARE_ASSIGNED;
230 
231 	D1(vswp, "%s:exit\n", __func__);
232 	return (vsharep);
233 }
234 
235 /*
236  * vsw_hio_bind_macaddr -- Remove the port's MAC address from the
237  *	physdev and bind it to the Share's RX group.
238  */
239 static int
240 vsw_hio_bind_macaddr(vsw_share_t *vsharep)
241 {
242 	vsw_t		*vswp = vsharep->vs_vswp;
243 	vsw_port_t	*portp = vsharep->vs_portp;
244 	mac_capab_rings_t *rcapab = &vswp->vhio.vh_rcapab;
245 	mac_group_info_t *ginfop = &vsharep->vs_rxginfo;
246 	int		rv;
247 
248 	/* Get the RX groupinfo */
249 	rcapab->mr_gget(rcapab->mr_handle, MAC_RING_TYPE_RX,
250 	    vsharep->vs_gnum, &vsharep->vs_rxginfo, NULL);
251 
252 	/* Unset the MAC address first */
253 	if (portp->addr_set != VSW_ADDR_UNSET) {
254 		(void) vsw_unset_hw(vswp, portp, VSW_VNETPORT);
255 	}
256 
257 	/* Bind the MAC address to the RX group */
258 	rv = ginfop->mrg_addmac(ginfop->mrg_driver,
259 	    (uint8_t *)&portp->p_macaddr.ether_addr_octet);
260 	if (rv != 0) {
261 		/* Restore the address back as it was */
262 		(void) vsw_set_hw(vswp, portp, VSW_VNETPORT);
263 		return (rv);
264 	}
265 	return (0);
266 }
267 
268 /*
269  * vsw_hio_unbind_macaddr -- Unbind the port's MAC address and restore
270  *	it back as it was before.
271  */
272 static void
273 vsw_hio_unbind_macaddr(vsw_share_t *vsharep)
274 {
275 	vsw_t		*vswp = vsharep->vs_vswp;
276 	vsw_port_t	*portp = vsharep->vs_portp;
277 	mac_group_info_t *ginfop = &vsharep->vs_rxginfo;
278 
279 	if (portp == NULL) {
280 		return;
281 	}
282 	/* Unbind the MAC address from the RX group */
283 	(void) ginfop->mrg_remmac(ginfop->mrg_driver,
284 	    (uint8_t *)&portp->p_macaddr.ether_addr_octet);
285 
286 	/* Program the MAC address back */
287 	(void) vsw_set_hw(vswp, portp, VSW_VNETPORT);
288 }
289 
290 /*
291  * vsw_hio_find_free_share -- Find a free Share.
292  */
293 static vsw_share_t *
294 vsw_hio_find_free_share(vsw_t *vswp)
295 {
296 	vsw_hio_t *hiop = &vswp->vhio;
297 	vsw_share_t *vsharep;
298 	int i;
299 
300 	D1(vswp, "%s:enter\n", __func__);
301 	for (i = 0; i < hiop->vh_num_shares; i++) {
302 		vsharep = &hiop->vh_shares[i];
303 		if (vsharep->vs_state == VSW_SHARE_FREE) {
304 			D1(vswp, "%s:Returning free share(%d)\n",
305 			    __func__, vsharep->vs_index);
306 			return (vsharep);
307 		}
308 	}
309 	D1(vswp, "%s:no free share\n", __func__);
310 	return (NULL);
311 }
312 
313 /*
314  * vsw_hio_find_vshare_ldcid -- Given ldc_id, find the corresponding
315  *	share structure.
316  */
317 static vsw_share_t *
318 vsw_hio_find_vshare_ldcid(vsw_t *vswp, uint64_t ldc_id)
319 {
320 	vsw_hio_t *hiop = &vswp->vhio;
321 	vsw_share_t *vsharep;
322 	int i;
323 
324 	D1(vswp, "%s:enter, ldc=0x%lx", __func__, ldc_id);
325 	for (i = 0; i < hiop->vh_num_shares; i++) {
326 		vsharep = &hiop->vh_shares[i];
327 		if (vsharep->vs_state == VSW_SHARE_FREE) {
328 			continue;
329 		}
330 		if (vsharep->vs_ldcid == ldc_id) {
331 			D1(vswp, "%s:returning share(%d)",
332 			    __func__, vsharep->vs_index);
333 			return (vsharep);
334 		}
335 	}
336 	D1(vswp, "%s:returning NULL", __func__);
337 	return (NULL);
338 }
339 
340 /*
341  * vsw_hio_find_vshare_port -- Given portp, find the corresponding
342  *	share structure.
343  */
344 static vsw_share_t *
345 vsw_hio_find_vshare_port(vsw_t *vswp, vsw_port_t *portp)
346 {
347 	vsw_hio_t *hiop = &vswp->vhio;
348 	vsw_share_t *vsharep;
349 	int i;
350 
351 	D1(vswp, "%s:enter, portp=0x%p", __func__, portp);
352 	for (i = 0; i < hiop->vh_num_shares; i++) {
353 		vsharep = &hiop->vh_shares[i];
354 		if (vsharep->vs_state == VSW_SHARE_FREE) {
355 			continue;
356 		}
357 		if (vsharep->vs_portp == portp) {
358 			D1(vswp, "%s:returning share(%d)",
359 			    __func__, vsharep->vs_index);
360 			return (vsharep);
361 		}
362 	}
363 	D1(vswp, "%s:returning NULL", __func__);
364 	return (NULL);
365 }
366 
367 /*
368  * vsw_hio_free_share -- Unbind the MAC address and free share.
369  */
370 static void
371 vsw_hio_free_share(vsw_share_t *vsharep)
372 {
373 	vsw_t		*vswp = vsharep->vs_vswp;
374 	vsw_hio_t	*hiop = &vswp->vhio;
375 	mac_capab_share_t *hcapab = &hiop->vh_scapab;
376 
377 	D1(vswp, "%s:enter\n", __func__);
378 
379 	/* First unbind the MAC address and restore it back */
380 	vsw_hio_unbind_macaddr(vsharep);
381 
382 	/* free share */
383 	hcapab->ms_sfree(vsharep->vs_shdl);
384 	vsharep->vs_state = VSW_SHARE_FREE;
385 
386 	/* DERR only for printing by default */
387 	DERR(vswp, "Share freed for ldc_id=0x%lx Cookie=0x%lX",
388 	    vsharep->vs_ldcid, vsharep->vs_cookie);
389 	D1(vswp, "%s:exit\n", __func__);
390 }
391 
392 
393 /*
394  * vsw_hio_cleanup -- Cleanup the HybridIO. It unregisters the callbs
395  *	and frees all shares.
396  */
397 void
398 vsw_hio_cleanup(vsw_t *vswp)
399 {
400 	D1(vswp, "%s:enter\n", __func__);
401 
402 	/* Unregister reboot and panic callbs. */
403 	if (vswp->hio_reboot_cb_id) {
404 		(void) callb_delete(vswp->hio_reboot_cb_id);
405 		vswp->hio_reboot_cb_id = 0;
406 	}
407 	if (vswp->hio_panic_cb_id) {
408 		(void) callb_delete(vswp->hio_panic_cb_id);
409 		vswp->hio_panic_cb_id = 0;
410 	}
411 	vsw_hio_free_all_shares(vswp, B_FALSE);
412 	D1(vswp, "%s:exit\n", __func__);
413 }
414 
415 /*
416  * vsw_hio_free_all_shares -- A routine to free all shares gracefully.
417  *	The following are the steps followed to accomplish this:
418  *
419  *	- First clear 'hio_capable' to avoid further share allocations.
420  *	- If a share is in accepted(ACKD) state, that means the guest
421  *	  has HybridIO setup etc. If so, send a DEL_SHARE message and
422  *	  give some time(delay) for the guest to ACK.
423  *	- If the Share is another state, give some time to transition to
424  *	  ACKD state, then try the above.
425  *	- After max retries, reset the ports to brute force the shares
426  *	  to be freed. Give a little delay for the LDC reset code to
427  *	  free the Share.
428  */
429 static void
430 vsw_hio_free_all_shares(vsw_t *vswp, boolean_t reboot)
431 {
432 	vsw_hio_t	*hiop = &vswp->vhio;
433 	vsw_port_list_t	*plist = &vswp->plist;
434 	vsw_share_t	*vsharep;
435 	int		free_shares = 0;
436 	int		max_retries = vsw_hio_max_cleanup_retries;
437 	int		i;
438 
439 	D1(vswp, "%s:enter\n", __func__);
440 
441 	/*
442 	 * Acquire plist->lockrw to make the locking a bit easier
443 	 * and keep the ports in a stable state while we are cleaningup
444 	 * HybridIO.
445 	 */
446 	READ_ENTER(&plist->lockrw);
447 	mutex_enter(&vswp->hw_lock);
448 	/*
449 	 * first clear the hio_capable flag so that no more
450 	 * HybridIO operations are initiated.
451 	 */
452 	vswp->hio_capable = B_FALSE;
453 
454 	do {
455 		free_shares = 0;
456 		for (i = 0; i < hiop->vh_num_shares; i++) {
457 			vsharep = &hiop->vh_shares[i];
458 			if (vsharep->vs_state == VSW_SHARE_FREE) {
459 				free_shares++;
460 				continue;
461 			}
462 			/*
463 			 * If the share is in DDS_ACKD state, then
464 			 * send DEL_SHARE message so that guest can
465 			 * release its Hybrid resource.
466 			 */
467 			if (vsharep->vs_state & VSW_SHARE_DDS_ACKD) {
468 				int rv;
469 
470 				/* send DDS_DEL_SHARE */
471 				D1(vswp, "%s:sending DEL_SHARE msg for "
472 				    "share(%d)", __func__, vsharep->vs_index);
473 				rv = vsw_hio_send_delshare_msg(vsharep);
474 				if (rv != 0) {
475 					/*
476 					 * No alternative, reset the port
477 					 * to force the release of Hybrid
478 					 * resources.
479 					 */
480 					vsw_hio_port_reset(vsharep->vs_portp,
481 					    B_FALSE);
482 				}
483 			}
484 			if (max_retries == 1) {
485 				/*
486 				 * Last retry,  reset the port.
487 				 * If it is reboot case, issue an immediate
488 				 * reset.
489 				 */
490 				DWARN(vswp, "%s:All retries failed, "
491 				    " cause a reset to trigger cleanup for "
492 				    "share(%d)", __func__, vsharep->vs_index);
493 				vsw_hio_port_reset(vsharep->vs_portp, reboot);
494 			}
495 		}
496 		if (free_shares == hiop->vh_num_shares) {
497 			/* Clean up is done */
498 			break;
499 		}
500 		/*
501 		 * Release the lock so that reply for DEL_SHARE
502 		 * messages come and get processed, that is, shares
503 		 * get freed.
504 		 * This delay is also needed for the port reset to
505 		 * release the Hybrid resource.
506 		 */
507 		mutex_exit(&vswp->hw_lock);
508 		drv_usecwait(vsw_hio_cleanup_delay);
509 		mutex_enter(&vswp->hw_lock);
510 		max_retries--;
511 	} while ((free_shares < hiop->vh_num_shares) && (max_retries > 0));
512 
513 	/* By now, all shares should be freed */
514 	if (free_shares != hiop->vh_num_shares) {
515 		if (reboot == B_FALSE) {
516 			cmn_err(CE_NOTE, "vsw%d: All physical resources "
517 			    "could not be freed", vswp->instance);
518 		}
519 	}
520 
521 	kmem_free(hiop->vh_shares, sizeof (vsw_share_t) * hiop->vh_num_shares);
522 	hiop->vh_shares = NULL;
523 	hiop->vh_num_shares = 0;
524 	mutex_exit(&vswp->hw_lock);
525 	RW_EXIT(&plist->lockrw);
526 	D1(vswp, "%s:exit\n", __func__);
527 }
528 
529 /*
530  * vsw_hio_start_ports -- Start HybridIO for ports that have
531  *	already established connection before HybridIO is intialized.
532  */
533 void
534 vsw_hio_start_ports(vsw_t *vswp)
535 {
536 	vsw_port_list_t	*plist = &vswp->plist;
537 	vsw_port_t	*portp;
538 	vsw_share_t	*vsharep;
539 	boolean_t	reset;
540 
541 	if (vswp->hio_capable == B_FALSE) {
542 		return;
543 	}
544 	READ_ENTER(&plist->lockrw);
545 	for (portp = plist->head; portp != NULL; portp = portp->p_next) {
546 		if ((portp->p_hio_enabled == B_FALSE) ||
547 		    (portp->p_hio_capable == B_FALSE)) {
548 			continue;
549 		}
550 
551 		reset = B_FALSE;
552 		mutex_enter(&vswp->hw_lock);
553 		vsharep = vsw_hio_find_vshare_port(vswp, portp);
554 		if (vsharep == NULL) {
555 			reset = B_TRUE;
556 		}
557 		mutex_exit(&vswp->hw_lock);
558 
559 		if (reset == B_TRUE) {
560 			/* Cause a rest to trigger HybridIO setup */
561 			vsw_hio_port_reset(portp, B_FALSE);
562 		}
563 	}
564 	RW_EXIT(&plist->lockrw);
565 }
566 
567 /*
568  * vsw_hio_start -- Start HybridIO for a guest(given LDC)
569  */
570 void
571 vsw_hio_start(vsw_t *vswp, vsw_ldc_t *ldcp)
572 {
573 	vsw_share_t	*vsharep;
574 	uint32_t	req_id;
575 	int		rv;
576 
577 	D1(vswp, "%s:enter ldc=0x%lx", __func__, ldcp->ldc_id);
578 	mutex_enter(&vswp->hw_lock);
579 	if (vswp->hio_capable == B_FALSE) {
580 		mutex_exit(&vswp->hw_lock);
581 		D2(vswp, "%s:not HIO capable", __func__);
582 		return;
583 	}
584 
585 	/* Verify if a share was already allocated */
586 	vsharep = vsw_hio_find_vshare_ldcid(vswp, ldcp->ldc_id);
587 	if (vsharep != NULL) {
588 		mutex_exit(&vswp->hw_lock);
589 		D2(vswp, "%s:Share already allocated to ldc=0x%lx",
590 		    __func__, ldcp->ldc_id);
591 		return;
592 	}
593 	vsharep = vsw_hio_alloc_share(vswp, ldcp);
594 	if (vsharep == NULL) {
595 		mutex_exit(&vswp->hw_lock);
596 		D2(vswp, "%s: no Share available for ldc=0x%lx",
597 		    __func__, ldcp->ldc_id);
598 		return;
599 	}
600 	req_id = VSW_DDS_NEXT_REQID(vsharep);
601 	rv = vsw_send_dds_msg(ldcp, DDS_VNET_ADD_SHARE, vsharep->vs_cookie,
602 	    vsharep->vs_macaddr, req_id);
603 	if (rv != 0) {
604 		/*
605 		 * Failed to send a DDS message, so cleanup now.
606 		 */
607 		vsw_hio_free_share(vsharep);
608 		mutex_exit(&vswp->hw_lock);
609 		return;
610 	}
611 	vsharep->vs_state &= ~VSW_SHARE_DDS_ACKD;
612 	vsharep->vs_state |= VSW_SHARE_DDS_SENT;
613 	mutex_exit(&vswp->hw_lock);
614 
615 	/* DERR only to print by default */
616 	DERR(vswp, "Share allocated for ldc_id=0x%lx Cookie=0x%lX",
617 	    ldcp->ldc_id, vsharep->vs_cookie);
618 
619 	D1(vswp, "%s:exit ldc=0x%lx", __func__, ldcp->ldc_id);
620 }
621 
622 /*
623  * vsw_hio_stop -- Stop/clean the HybridIO config for a guest(given ldc).
624  */
625 void
626 vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp)
627 {
628 	vsw_share_t *vsharep;
629 
630 	D1(vswp, "%s:enter ldc=0x%lx", __func__, ldcp->ldc_id);
631 
632 	mutex_enter(&vswp->hw_lock);
633 	vsharep = vsw_hio_find_vshare_ldcid(vswp, ldcp->ldc_id);
634 	if (vsharep == NULL) {
635 		D1(vswp, "%s:no share found for ldc=0x%lx",
636 		    __func__, ldcp->ldc_id);
637 		mutex_exit(&vswp->hw_lock);
638 		return;
639 	}
640 	vsw_hio_free_share(vsharep);
641 	mutex_exit(&vswp->hw_lock);
642 
643 	D1(vswp, "%s:exit ldc=0x%lx", __func__, ldcp->ldc_id);
644 }
645 
646 /*
647  * vsw_hio_send_delshare_msg -- Send a DEL_SHARE message to the	guest.
648  */
649 static int
650 vsw_hio_send_delshare_msg(vsw_share_t *vsharep)
651 {
652 	vsw_t *vswp = vsharep->vs_vswp;
653 	vsw_port_t *portp;
654 	vsw_ldc_list_t	*ldcl;
655 	vsw_ldc_t	*ldcp;
656 	uint32_t	req_id;
657 	uint64_t	cookie = vsharep->vs_cookie;
658 	uint64_t	macaddr = vsharep->vs_macaddr;
659 	int		rv;
660 
661 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
662 	mutex_exit(&vswp->hw_lock);
663 
664 	portp = vsharep->vs_portp;
665 	if (portp == NULL) {
666 		mutex_enter(&vswp->hw_lock);
667 		return (0);
668 	}
669 
670 	ldcl = &portp->p_ldclist;
671 	READ_ENTER(&ldcl->lockrw);
672 	ldcp = ldcl->head;
673 	if ((ldcp == NULL) || (ldcp->ldc_id != vsharep->vs_ldcid)) {
674 		RW_EXIT(&ldcl->lockrw);
675 		mutex_enter(&vswp->hw_lock);
676 		return (0);
677 	}
678 	req_id = VSW_DDS_NEXT_REQID(vsharep);
679 	rv = vsw_send_dds_msg(ldcp, DDS_VNET_DEL_SHARE,
680 	    cookie, macaddr, req_id);
681 
682 	RW_EXIT(&ldcl->lockrw);
683 	mutex_enter(&vswp->hw_lock);
684 	if (rv == 0) {
685 		vsharep->vs_state &= ~VSW_SHARE_DDS_ACKD;
686 		vsharep->vs_state |= VSW_SHARE_DDS_SENT;
687 	}
688 	return (rv);
689 }
690 
691 /*
692  * vsw_send_dds_msg -- Send a DDS message.
693  */
694 static int
695 vsw_send_dds_msg(vsw_ldc_t *ldcp, uint8_t dds_subclass, uint64_t
696     cookie, uint64_t macaddr, uint32_t req_id)
697 {
698 	vsw_t *vswp = ldcp->ldc_port->p_vswp;
699 	vio_dds_msg_t	vmsg;
700 	dds_share_msg_t	*smsg = &vmsg.msg.share_msg;
701 	int rv;
702 
703 	D1(vswp, "%s:enter\n", __func__);
704 	vmsg.tag.vio_msgtype = VIO_TYPE_CTRL;
705 	vmsg.tag.vio_subtype = VIO_SUBTYPE_INFO;
706 	vmsg.tag.vio_subtype_env = VIO_DDS_INFO;
707 	vmsg.tag.vio_sid = ldcp->local_session;
708 	vmsg.dds_class = DDS_VNET_NIU;
709 	vmsg.dds_subclass = dds_subclass;
710 	vmsg.dds_req_id = req_id;
711 	smsg->macaddr = macaddr;
712 	smsg->cookie = cookie;
713 	rv = vsw_send_msg(ldcp, &vmsg, sizeof (vmsg), B_FALSE);
714 	D1(vswp, "%s:exit rv=%d\n", __func__, rv);
715 	return (rv);
716 }
717 
718 /*
719  * vsw_process_dds_msg -- Process a DDS message received from a guest.
720  */
721 void
722 vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg)
723 {
724 	vsw_share_t	*vsharep;
725 	vio_dds_msg_t	*dmsg = msg;
726 
727 	D1(vswp, "%s:enter ldc=0x%lx\n", __func__, ldcp->ldc_id);
728 	if (dmsg->dds_class != DDS_VNET_NIU) {
729 		/* discard */
730 		return;
731 	}
732 	mutex_enter(&vswp->hw_lock);
733 	/*
734 	 * We expect to receive DDS messages only from guests that
735 	 * have HybridIO started.
736 	 */
737 	vsharep = vsw_hio_find_vshare_ldcid(vswp, ldcp->ldc_id);
738 	if (vsharep == NULL) {
739 		mutex_exit(&vswp->hw_lock);
740 		return;
741 	}
742 
743 	switch (dmsg->dds_subclass) {
744 	case DDS_VNET_ADD_SHARE:
745 		/* A response for ADD_SHARE message. */
746 		D1(vswp, "%s:DDS_VNET_ADD_SHARE\n", __func__);
747 		if (!(vsharep->vs_state & VSW_SHARE_DDS_SENT)) {
748 			DWARN(vswp, "%s: invalid ADD_SHARE response  message "
749 			    " share state=0x%X", __func__, vsharep->vs_state);
750 			break;
751 		}
752 
753 		if (dmsg->dds_req_id != vsharep->vs_req_id) {
754 			DWARN(vswp, "%s: invalid req_id in ADD_SHARE response"
755 			    " message req_id=0x%X share's req_id=0x%X",
756 			    __func__, dmsg->dds_req_id, vsharep->vs_req_id);
757 			break;
758 		}
759 
760 		if (dmsg->tag.vio_subtype == VIO_SUBTYPE_NACK) {
761 			DWARN(vswp, "%s: NACK received for ADD_SHARE"
762 			    " message ldcid=0x%lx", __func__, ldcp->ldc_id);
763 			/* cleanup for NACK */
764 			vsw_hio_free_share(vsharep);
765 		} else {
766 			D2(vswp, "%s: ACK received for ADD_SHARE", __func__);
767 			vsharep->vs_state &= ~VSW_SHARE_DDS_SENT;
768 			vsharep->vs_state |= VSW_SHARE_DDS_ACKD;
769 		}
770 		break;
771 
772 	case DDS_VNET_DEL_SHARE:
773 		/* A response for DEL_SHARE message */
774 		D1(vswp, "%s:DDS_VNET_DEL_SHARE\n", __func__);
775 		if (!(vsharep->vs_state & VSW_SHARE_DDS_SENT)) {
776 			DWARN(vswp, "%s: invalid DEL_SHARE response message "
777 			    " share state=0x%X", __func__, vsharep->vs_state);
778 			break;
779 		}
780 
781 		if (dmsg->dds_req_id != vsharep->vs_req_id) {
782 			DWARN(vswp, "%s: invalid req_id in DEL_SHARE response"
783 			    " message share req_id=0x%X share's req_id=0x%X",
784 			    __func__, dmsg->dds_req_id, vsharep->vs_req_id);
785 			break;
786 		}
787 		if (dmsg->tag.vio_subtype == VIO_SUBTYPE_NACK) {
788 			DWARN(vswp, "%s: NACK received for DEL_SHARE",
789 			    __func__);
790 		}
791 
792 		/* There is nothing we can do, free share now */
793 		vsw_hio_free_share(vsharep);
794 		break;
795 
796 	case DDS_VNET_REL_SHARE:
797 		/* Guest has released Share voluntarily, so free it now */
798 		D1(vswp, "%s:DDS_VNET_REL_SHARE\n", __func__);
799 		/* send ACK */
800 		(void) vsw_send_dds_resp_msg(ldcp, dmsg, B_FALSE);
801 		vsw_hio_free_share(vsharep);
802 		break;
803 	default:
804 		DERR(vswp, "%s: Invalid DDS message type=0x%X",
805 		    __func__, dmsg->dds_subclass);
806 		break;
807 	}
808 	mutex_exit(&vswp->hw_lock);
809 	D1(vswp, "%s:exit ldc=0x%lx\n", __func__, ldcp->ldc_id);
810 }
811 
812 /*
813  * vsw_send_dds_resp_msg -- Send a DDS response message.
814  */
815 static int
816 vsw_send_dds_resp_msg(vsw_ldc_t *ldcp, vio_dds_msg_t *dmsg, int ack)
817 {
818 	vsw_t	*vswp = ldcp->ldc_port->p_vswp;
819 	int	rv;
820 
821 	D1(vswp, "%s:enter\n", __func__);
822 	if (ack == B_TRUE) {
823 		dmsg->tag.vio_subtype = VIO_SUBTYPE_ACK;
824 		dmsg->msg.share_resp_msg.status = DDS_VNET_SUCCESS;
825 	} else {
826 		dmsg->tag.vio_subtype = VIO_SUBTYPE_NACK;
827 		dmsg->msg.share_resp_msg.status = DDS_VNET_FAIL;
828 	}
829 	rv = vsw_send_msg(ldcp, dmsg, sizeof (vio_dds_msg_t), B_FALSE);
830 	D1(vswp, "%s:exit rv=%d\n", __func__, rv);
831 	return (rv);
832 }
833 
834 /*
835  * vsw_hio_port_update -- update Hybrid mode change for a port.
836  */
837 void
838 vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled)
839 {
840 	/* Verify if the mode really changed */
841 	if (portp->p_hio_enabled == hio_enabled) {
842 		return;
843 	}
844 
845 	if (hio_enabled == B_FALSE) {
846 		/* Hybrid Mode is disabled, so stop HybridIO */
847 		vsw_hio_stop_port(portp);
848 		portp->p_hio_enabled = B_FALSE;
849 	} else {
850 		portp->p_hio_enabled =  B_TRUE;
851 		/* reset the port to initiate HybridIO setup */
852 		vsw_hio_port_reset(portp, B_FALSE);
853 	}
854 }
855 
856 /*
857  * vsw_hio_stop_port -- Stop HybridIO for a given port. Sequence
858  *	followed is similar to vsw_hio_free_all_shares().
859  *
860  */
861 void
862 vsw_hio_stop_port(vsw_port_t *portp)
863 {
864 	vsw_t *vswp = portp->p_vswp;
865 	vsw_share_t *vsharep;
866 	int max_retries = vsw_hio_max_cleanup_retries;
867 
868 	D1(vswp, "%s:enter\n", __func__);
869 	mutex_enter(&vswp->hw_lock);
870 
871 	if (vswp->hio_capable == B_FALSE) {
872 		mutex_exit(&vswp->hw_lock);
873 		return;
874 	}
875 
876 	vsharep = vsw_hio_find_vshare_port(vswp, portp);
877 	if (vsharep == NULL) {
878 		mutex_exit(&vswp->hw_lock);
879 		return;
880 	}
881 
882 	do {
883 		if (vsharep->vs_state & VSW_SHARE_DDS_ACKD) {
884 			int rv;
885 
886 			/* send DDS_DEL_SHARE */
887 			D1(vswp, "%s:sending DEL_SHARE msg for "
888 			    "share(%d)", __func__, vsharep->vs_index);
889 			rv = vsw_hio_send_delshare_msg(vsharep);
890 			if (rv != 0) {
891 				/*
892 				 * Cause a port reset to trigger
893 				 * cleanup.
894 				 */
895 				vsw_hio_port_reset(vsharep->vs_portp, B_FALSE);
896 			}
897 		}
898 		if (max_retries == 1) {
899 			/* last retry */
900 			DWARN(vswp, "%s:All retries failed, "
901 			    " cause a reset to trigger cleanup for "
902 			    "share(%d)", __func__, vsharep->vs_index);
903 			vsw_hio_port_reset(vsharep->vs_portp, B_FALSE);
904 		}
905 
906 		/* Check if the share still assigned to this port */
907 		if ((vsharep->vs_portp != portp) ||
908 		    (vsharep->vs_state == VSW_SHARE_FREE)) {
909 			break;
910 		}
911 
912 		/*
913 		 * Release the lock so that reply for DEL_SHARE
914 		 * messages come and get processed, that is, shares
915 		 * get freed.
916 		 */
917 		mutex_exit(&vswp->hw_lock);
918 		drv_usecwait(vsw_hio_cleanup_delay);
919 		mutex_enter(&vswp->hw_lock);
920 
921 		/* Check if the share still assigned to this port */
922 		if ((vsharep->vs_portp != portp) ||
923 		    (vsharep->vs_state == VSW_SHARE_FREE)) {
924 			break;
925 		}
926 		max_retries--;
927 	} while ((vsharep->vs_state != VSW_SHARE_FREE) && (max_retries > 0));
928 
929 	mutex_exit(&vswp->hw_lock);
930 	D1(vswp, "%s:exit\n", __func__);
931 }
932 
933 /*
934  * vsw_hio_rest_all -- Resets all ports that have shares allocated.
935  *	It is called only in the panic code path, so the LDC channels
936  *	are reset immediately.
937  */
938 static void
939 vsw_hio_reset_all(vsw_t *vswp)
940 {
941 	vsw_hio_t	*hiop = &vswp->vhio;
942 	vsw_share_t	*vsharep;
943 	int		i;
944 
945 	D1(vswp, "%s:enter\n", __func__);
946 
947 	if (vswp->hio_capable != B_TRUE)
948 		return;
949 
950 	for (i = 0; i < hiop->vh_num_shares; i++) {
951 		vsharep = &hiop->vh_shares[i];
952 		if (vsharep->vs_state == VSW_SHARE_FREE) {
953 			continue;
954 		}
955 		/*
956 		 * Reset the port with immediate flag enabled,
957 		 * to cause LDC reset immediately.
958 		 */
959 		vsw_hio_port_reset(vsharep->vs_portp, B_TRUE);
960 	}
961 	D1(vswp, "%s:exit\n", __func__);
962 }
963 
964 /*
965  * vsw_hio_reboot_callb -- Called for reboot event. It tries to
966  *	free all currently allocated shares.
967  */
968 /* ARGSUSED */
969 static boolean_t
970 vsw_hio_reboot_callb(void *arg, int code)
971 {
972 	vsw_t *vswp = arg;
973 
974 	D1(vswp, "%s:enter\n", __func__);
975 	vsw_hio_free_all_shares(vswp, B_TRUE);
976 	D1(vswp, "%s:exit\n", __func__);
977 	return (B_TRUE);
978 }
979 
980 /*
981  * vsw_hio_panic_callb -- Called from panic event. It resets all
982  *	the ports that have shares allocated. This is done to
983  *	trigger the cleanup in the guest ahead of HV reset.
984  */
985 /* ARGSUSED */
986 static boolean_t
987 vsw_hio_panic_callb(void *arg, int code)
988 {
989 	vsw_t *vswp = arg;
990 
991 	D1(vswp, "%s:enter\n", __func__);
992 	vsw_hio_reset_all(vswp);
993 	D1(vswp, "%s:exit\n", __func__);
994 	return (B_TRUE);
995 }
996