xref: /titanic_51/usr/src/uts/sun4v/io/vsw_hio.c (revision 2225707c7e7edf7c636ed349df2592ef85329cdd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/debug.h>
30 #include <sys/time.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/user.h>
34 #include <sys/stropts.h>
35 #include <sys/stream.h>
36 #include <sys/strlog.h>
37 #include <sys/strsubr.h>
38 #include <sys/cmn_err.h>
39 #include <sys/cpu.h>
40 #include <sys/kmem.h>
41 #include <sys/conf.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/ksynch.h>
45 #include <sys/stat.h>
46 #include <sys/kstat.h>
47 #include <sys/vtrace.h>
48 #include <sys/strsun.h>
49 #include <sys/dlpi.h>
50 #include <sys/ethernet.h>
51 #include <net/if.h>
52 #include <sys/varargs.h>
53 #include <sys/machsystm.h>
54 #include <sys/modctl.h>
55 #include <sys/modhash.h>
56 #include <sys/mac_provider.h>
57 #include <sys/mac_ether.h>
58 #include <sys/taskq.h>
59 #include <sys/note.h>
60 #include <sys/mach_descrip.h>
61 #include <sys/mac.h>
62 #include <sys/mdeg.h>
63 #include <sys/ldc.h>
64 #include <sys/vsw_fdb.h>
65 #include <sys/vsw.h>
66 #include <sys/vio_mailbox.h>
67 #include <sys/vnet_mailbox.h>
68 #include <sys/vnet_common.h>
69 #include <sys/vio_util.h>
70 #include <sys/sdt.h>
71 #include <sys/atomic.h>
72 #include <sys/callb.h>
73 
74 
75 #define	VSW_DDS_NEXT_REQID(vsharep)	(++vsharep->vs_req_id)
76 
77 extern boolean_t vsw_hio_enabled;		/* HybridIO enabled? */
78 extern int vsw_hio_max_cleanup_retries;
79 extern int vsw_hio_cleanup_delay;
80 
81 /* Functions imported from other files */
82 extern int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t);
83 extern void vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate);
84 extern void vsw_port_mac_reconfig(vsw_port_t *portp, boolean_t update_vlans,
85     uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids);
86 
87 /* Functions exported to other files */
88 void vsw_hio_init(vsw_t *vswp);
89 void vsw_hio_cleanup(vsw_t *vswp);
90 void vsw_hio_start(vsw_t *vswp, vsw_ldc_t *ldcp);
91 void vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp);
92 void vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg);
93 void vsw_hio_start_ports(vsw_t *vswp);
94 void vsw_hio_stop_port(vsw_port_t *portp);
95 
96 /* Support functions */
97 static void vsw_hio_free_all_shares(vsw_t *vswp, boolean_t reboot);
98 static vsw_share_t *vsw_hio_alloc_share(vsw_t *vswp, vsw_ldc_t *ldcp);
99 static void vsw_hio_free_share(vsw_share_t *vsharep);
100 static vsw_share_t *vsw_hio_find_free_share(vsw_t *vswp);
101 static vsw_share_t *vsw_hio_find_vshare_ldcid(vsw_t *vswp, uint64_t ldc_id);
102 static vsw_share_t *vsw_hio_find_vshare_port(vsw_t *vswp, vsw_port_t *portp);
103 static int vsw_send_dds_msg(vsw_ldc_t *ldcp, uint8_t dds_subclass,
104     uint64_t cookie, uint64_t macaddr, uint32_t req_id);
105 static int vsw_send_dds_resp_msg(vsw_ldc_t *ldcp, vio_dds_msg_t *dmsg, int ack);
106 static int vsw_hio_send_delshare_msg(vsw_share_t *vsharep);
107 static boolean_t vsw_hio_reboot_callb(void *arg, int code);
108 static boolean_t vsw_hio_panic_callb(void *arg, int code);
109 
110 /*
111  * Locking strategy for HybridIO is followed as below:
112  *
113  *	- As the Shares are associated with a network device, the
114  *	  the global lock('vswp>mac_lock') is used for all Shares
115  *	  related operations.
116  *	- The 'port->maccl_rwlock' is used to synchronize only the
117  *	  the operations that operate on that port's mac client. That
118  *	  is, the share_bind and unbind operations only.
119  *
120  *	- The locking hierarchy follows that the global mac_lock is
121  *	  acquired first and then the ports mac client lock(maccl_rwlock)
122  */
123 
124 
125 static kstat_t *vsw_hio_setup_kstats(char *ks_mod, char *ks_name, vsw_t *vswp);
126 static void vsw_hio_destroy_kstats(vsw_t *vswp);
127 static int vsw_hio_kstats_update(kstat_t *ksp, int rw);
128 
129 /*
130  * vsw_hio_init -- Initialize the HybridIO related info.
131  *	- Query SHARES and RINGS capability. Both capabilities
132  *	  need to be supported by the physical-device.
133  */
134 void
135 vsw_hio_init(vsw_t *vswp)
136 {
137 	vsw_hio_t	*hiop = &vswp->vhio;
138 	int		num_shares;
139 	int		i;
140 
141 	ASSERT(MUTEX_HELD(&vswp->mac_lock));
142 	D1(vswp, "%s:enter\n", __func__);
143 	if (vsw_hio_enabled == B_FALSE) {
144 		return;
145 	}
146 
147 	vswp->hio_capable = B_FALSE;
148 	num_shares = mac_share_capable(vswp->mh);
149 	if (num_shares == 0) {
150 		D2(vswp, "%s: %s is not HybridIO capable\n", __func__,
151 		    vswp->physname);
152 		return;
153 	}
154 	hiop->vh_num_shares = num_shares;
155 	hiop->vh_shares = kmem_zalloc((sizeof (vsw_share_t) *
156 	    hiop->vh_num_shares), KM_SLEEP);
157 	for (i = 0; i < hiop->vh_num_shares; i++) {
158 		hiop->vh_shares[i].vs_state = VSW_SHARE_FREE;
159 		hiop->vh_shares[i].vs_index = i;
160 		hiop->vh_shares[i].vs_vswp = vswp;
161 	}
162 	vswp->hio_capable = B_TRUE;
163 
164 	/*
165 	 * Register to get reboot and panic events so that
166 	 * we can cleanup HybridIO resources gracefully.
167 	 */
168 	vswp->hio_reboot_cb_id = callb_add(vsw_hio_reboot_callb,
169 	    (void *)vswp, CB_CL_MDBOOT, "vsw_hio");
170 
171 	vswp->hio_panic_cb_id = callb_add(vsw_hio_panic_callb,
172 	    (void *)vswp, CB_CL_PANIC, "vsw_hio");
173 
174 	/* setup kstats for hybrid resources */
175 	hiop->vh_ksp = vsw_hio_setup_kstats(DRV_NAME, "hio", vswp);
176 	if (hiop->vh_ksp == NULL) {
177 		DERR(vswp, "%s: kstats setup failed", __func__);
178 	}
179 
180 	D2(vswp, "%s: %s is HybridIO capable num_shares=%d\n", __func__,
181 	    vswp->physname, hiop->vh_num_shares);
182 	D1(vswp, "%s:exit\n", __func__);
183 }
184 
185 /*
186  * vsw_hio_alloc_share -- Allocate and setup the share for a guest domain.
187  *	- Allocate a free share.
188  *	- Bind the Guest's MAC address.
189  */
190 static vsw_share_t *
191 vsw_hio_alloc_share(vsw_t *vswp, vsw_ldc_t *ldcp)
192 {
193 	vsw_share_t	*vsharep;
194 	vsw_port_t	*portp = ldcp->ldc_port;
195 	uint64_t	ldc_id = ldcp->ldc_id;
196 	int		rv;
197 
198 	D1(vswp, "%s:enter\n", __func__);
199 	vsharep = vsw_hio_find_free_share(vswp);
200 	if (vsharep == NULL) {
201 		/* No free shares available */
202 		return (NULL);
203 	}
204 
205 	WRITE_ENTER(&portp->maccl_rwlock);
206 	rv = mac_share_bind(portp->p_mch, ldc_id, &vsharep->vs_cookie);
207 	RW_EXIT(&portp->maccl_rwlock);
208 	if (rv != 0) {
209 		return (NULL);
210 	}
211 
212 	/* Cache some useful info */
213 	vsharep->vs_ldcid = ldcp->ldc_id;
214 	vsharep->vs_macaddr = vnet_macaddr_strtoul(
215 	    portp->p_macaddr.ether_addr_octet);
216 	vsharep->vs_portp = ldcp->ldc_port;
217 	vsharep->vs_state |= VSW_SHARE_ASSIGNED;
218 
219 	D1(vswp, "%s:exit\n", __func__);
220 	return (vsharep);
221 }
222 
223 /*
224  * vsw_hio_find_free_share -- Find a free Share.
225  */
226 static vsw_share_t *
227 vsw_hio_find_free_share(vsw_t *vswp)
228 {
229 	vsw_hio_t *hiop = &vswp->vhio;
230 	vsw_share_t *vsharep;
231 	int i;
232 
233 	D1(vswp, "%s:enter\n", __func__);
234 	for (i = 0; i < hiop->vh_num_shares; i++) {
235 		vsharep = &hiop->vh_shares[i];
236 		if (vsharep->vs_state == VSW_SHARE_FREE) {
237 			D1(vswp, "%s:Returning free share(%d)\n",
238 			    __func__, vsharep->vs_index);
239 			return (vsharep);
240 		}
241 	}
242 	D1(vswp, "%s:no free share\n", __func__);
243 	return (NULL);
244 }
245 
246 /*
247  * vsw_hio_find_vshare_ldcid -- Given ldc_id, find the corresponding
248  *	share structure.
249  */
250 static vsw_share_t *
251 vsw_hio_find_vshare_ldcid(vsw_t *vswp, uint64_t ldc_id)
252 {
253 	vsw_hio_t *hiop = &vswp->vhio;
254 	vsw_share_t *vsharep;
255 	int i;
256 
257 	D1(vswp, "%s:enter, ldc=0x%lx", __func__, ldc_id);
258 	for (i = 0; i < hiop->vh_num_shares; i++) {
259 		vsharep = &hiop->vh_shares[i];
260 		if (vsharep->vs_state == VSW_SHARE_FREE) {
261 			continue;
262 		}
263 		if (vsharep->vs_ldcid == ldc_id) {
264 			D1(vswp, "%s:returning share(%d)",
265 			    __func__, vsharep->vs_index);
266 			return (vsharep);
267 		}
268 	}
269 	D1(vswp, "%s:returning NULL", __func__);
270 	return (NULL);
271 }
272 
273 /*
274  * vsw_hio_find_vshare_port -- Given portp, find the corresponding
275  *	share structure.
276  */
277 static vsw_share_t *
278 vsw_hio_find_vshare_port(vsw_t *vswp, vsw_port_t *portp)
279 {
280 	vsw_hio_t *hiop = &vswp->vhio;
281 	vsw_share_t *vsharep;
282 	int i;
283 
284 	D1(vswp, "%s:enter, portp=0x%p", __func__, portp);
285 	for (i = 0; i < hiop->vh_num_shares; i++) {
286 		vsharep = &hiop->vh_shares[i];
287 		if (vsharep->vs_state == VSW_SHARE_FREE) {
288 			continue;
289 		}
290 		if (vsharep->vs_portp == portp) {
291 			D1(vswp, "%s:returning share(%d)",
292 			    __func__, vsharep->vs_index);
293 			return (vsharep);
294 		}
295 	}
296 	D1(vswp, "%s:returning NULL", __func__);
297 	return (NULL);
298 }
299 
300 /*
301  * vsw_hio_free_share -- Unbind the MAC address and free share.
302  */
303 static void
304 vsw_hio_free_share(vsw_share_t *vsharep)
305 {
306 	vsw_t		*vswp = vsharep->vs_vswp;
307 	vsw_port_t	*portp = vsharep->vs_portp;
308 
309 	D1(vswp, "%s:enter\n", __func__);
310 
311 	WRITE_ENTER(&portp->maccl_rwlock);
312 	mac_share_unbind(portp->p_mch);
313 	RW_EXIT(&portp->maccl_rwlock);
314 	vsharep->vs_state = VSW_SHARE_FREE;
315 	vsharep->vs_macaddr = 0;
316 	vsharep->vs_portp = NULL;
317 
318 	/* DERR only for printing by default */
319 	DERR(vswp, "Share freed for ldc_id=0x%lx Cookie=0x%lX",
320 	    vsharep->vs_ldcid, vsharep->vs_cookie);
321 	D1(vswp, "%s:exit\n", __func__);
322 }
323 
324 
325 /*
326  * vsw_hio_cleanup -- Cleanup the HybridIO. It unregisters the callbs
327  *	and frees all shares.
328  */
329 void
330 vsw_hio_cleanup(vsw_t *vswp)
331 {
332 	D1(vswp, "%s:enter\n", __func__);
333 
334 	/* Unregister reboot and panic callbs. */
335 	if (vswp->hio_reboot_cb_id) {
336 		(void) callb_delete(vswp->hio_reboot_cb_id);
337 		vswp->hio_reboot_cb_id = 0;
338 	}
339 	if (vswp->hio_panic_cb_id) {
340 		(void) callb_delete(vswp->hio_panic_cb_id);
341 		vswp->hio_panic_cb_id = 0;
342 	}
343 	vsw_hio_free_all_shares(vswp, B_FALSE);
344 	vsw_hio_destroy_kstats(vswp);
345 	D1(vswp, "%s:exit\n", __func__);
346 }
347 
348 /*
349  * vsw_hio_free_all_shares -- A routine to free all shares gracefully.
350  *	The following are the steps followed to accomplish this:
351  *
352  *	- First clear 'hio_capable' to avoid further share allocations.
353  *	- If a share is in accepted(ACKD) state, that means the guest
354  *	  has HybridIO setup etc. If so, send a DEL_SHARE message and
355  *	  give some time(delay) for the guest to ACK.
356  *	- If the Share is another state, give some time to transition to
357  *	  ACKD state, then try the above.
358  *	- After max retries, reset the ports to brute force the shares
359  *	  to be freed. Give a little delay for the LDC reset code to
360  *	  free the Share.
361  */
362 static void
363 vsw_hio_free_all_shares(vsw_t *vswp, boolean_t reboot)
364 {
365 	vsw_hio_t	*hiop = &vswp->vhio;
366 	vsw_port_list_t	*plist = &vswp->plist;
367 	vsw_share_t	*vsharep;
368 	int		free_shares = 0;
369 	int		max_retries = vsw_hio_max_cleanup_retries;
370 	int		i;
371 
372 	D1(vswp, "%s:enter\n", __func__);
373 
374 	/*
375 	 * Acquire plist->lockrw to make the locking a bit easier
376 	 * and keep the ports in a stable state while we are cleaningup
377 	 * HybridIO.
378 	 */
379 	READ_ENTER(&plist->lockrw);
380 	mutex_enter(&vswp->mac_lock);
381 	/*
382 	 * first clear the hio_capable flag so that no more
383 	 * HybridIO operations are initiated.
384 	 */
385 	vswp->hio_capable = B_FALSE;
386 
387 	do {
388 		free_shares = 0;
389 		for (i = 0; i < hiop->vh_num_shares; i++) {
390 			vsharep = &hiop->vh_shares[i];
391 			if (vsharep->vs_state == VSW_SHARE_FREE) {
392 				free_shares++;
393 				continue;
394 			}
395 			/*
396 			 * If the share is in DDS_ACKD state, then
397 			 * send DEL_SHARE message so that guest can
398 			 * release its Hybrid resource.
399 			 */
400 			if (vsharep->vs_state & VSW_SHARE_DDS_ACKD) {
401 				int rv;
402 
403 				/* send DDS_DEL_SHARE */
404 				D1(vswp, "%s:sending DEL_SHARE msg for "
405 				    "share(%d)", __func__, vsharep->vs_index);
406 				rv = vsw_hio_send_delshare_msg(vsharep);
407 				if (rv != 0) {
408 					/*
409 					 * No alternative, reset the port
410 					 * to force the release of Hybrid
411 					 * resources.
412 					 */
413 					vsw_hio_port_reset(vsharep->vs_portp,
414 					    B_FALSE);
415 				}
416 			}
417 			if (max_retries == 1) {
418 				/*
419 				 * Last retry,  reset the port.
420 				 * If it is reboot case, issue an immediate
421 				 * reset.
422 				 */
423 				DWARN(vswp, "%s:All retries failed, "
424 				    " cause a reset to trigger cleanup for "
425 				    "share(%d)", __func__, vsharep->vs_index);
426 				vsw_hio_port_reset(vsharep->vs_portp, reboot);
427 			}
428 		}
429 		if (free_shares == hiop->vh_num_shares) {
430 			/* Clean up is done */
431 			break;
432 		}
433 		/*
434 		 * Release the lock so that reply for DEL_SHARE
435 		 * messages come and get processed, that is, shares
436 		 * get freed.
437 		 * This delay is also needed for the port reset to
438 		 * release the Hybrid resource.
439 		 */
440 		mutex_exit(&vswp->mac_lock);
441 		drv_usecwait(vsw_hio_cleanup_delay);
442 		mutex_enter(&vswp->mac_lock);
443 		max_retries--;
444 	} while ((free_shares < hiop->vh_num_shares) && (max_retries > 0));
445 
446 	/* By now, all shares should be freed */
447 	if (free_shares != hiop->vh_num_shares) {
448 		if (reboot == B_FALSE) {
449 			cmn_err(CE_NOTE, "vsw%d: All physical resources "
450 			    "could not be freed", vswp->instance);
451 		}
452 	}
453 
454 	kmem_free(hiop->vh_shares, sizeof (vsw_share_t) * hiop->vh_num_shares);
455 	hiop->vh_shares = NULL;
456 	hiop->vh_num_shares = 0;
457 	mutex_exit(&vswp->mac_lock);
458 	RW_EXIT(&plist->lockrw);
459 	D1(vswp, "%s:exit\n", __func__);
460 }
461 
462 /*
463  * vsw_hio_start_ports -- Start HybridIO for ports that have
464  *	already established connection before HybridIO is intialized.
465  */
466 void
467 vsw_hio_start_ports(vsw_t *vswp)
468 {
469 	vsw_port_list_t	*plist = &vswp->plist;
470 	vsw_port_t	*portp;
471 	vsw_share_t	*vsharep;
472 	boolean_t	reset;
473 
474 	if (vswp->hio_capable == B_FALSE) {
475 		return;
476 	}
477 	READ_ENTER(&plist->lockrw);
478 	for (portp = plist->head; portp != NULL; portp = portp->p_next) {
479 		if ((portp->p_hio_enabled == B_FALSE) ||
480 		    (portp->p_hio_capable == B_FALSE)) {
481 			continue;
482 		}
483 
484 		reset = B_FALSE;
485 		mutex_enter(&vswp->mac_lock);
486 		vsharep = vsw_hio_find_vshare_port(vswp, portp);
487 		if (vsharep == NULL) {
488 			reset = B_TRUE;
489 		}
490 		mutex_exit(&vswp->mac_lock);
491 
492 		if (reset == B_TRUE) {
493 			/* Cause a rest to trigger HybridIO setup */
494 			vsw_hio_port_reset(portp, B_FALSE);
495 		}
496 	}
497 	RW_EXIT(&plist->lockrw);
498 }
499 
500 /*
501  * vsw_hio_start -- Start HybridIO for a guest(given LDC)
502  */
503 void
504 vsw_hio_start(vsw_t *vswp, vsw_ldc_t *ldcp)
505 {
506 	vsw_share_t	*vsharep;
507 	uint32_t	req_id;
508 	int		rv;
509 
510 	D1(vswp, "%s:enter ldc=0x%lx", __func__, ldcp->ldc_id);
511 	mutex_enter(&vswp->mac_lock);
512 	if (vswp->hio_capable == B_FALSE) {
513 		mutex_exit(&vswp->mac_lock);
514 		D2(vswp, "%s:not HIO capable", __func__);
515 		return;
516 	}
517 
518 	/* Verify if a share was already allocated */
519 	vsharep = vsw_hio_find_vshare_ldcid(vswp, ldcp->ldc_id);
520 	if (vsharep != NULL) {
521 		mutex_exit(&vswp->mac_lock);
522 		D2(vswp, "%s:Share already allocated to ldc=0x%lx",
523 		    __func__, ldcp->ldc_id);
524 		return;
525 	}
526 	vsharep = vsw_hio_alloc_share(vswp, ldcp);
527 	if (vsharep == NULL) {
528 		mutex_exit(&vswp->mac_lock);
529 		D2(vswp, "%s: no Share available for ldc=0x%lx",
530 		    __func__, ldcp->ldc_id);
531 		return;
532 	}
533 	req_id = VSW_DDS_NEXT_REQID(vsharep);
534 	rv = vsw_send_dds_msg(ldcp, DDS_VNET_ADD_SHARE, vsharep->vs_cookie,
535 	    vsharep->vs_macaddr, req_id);
536 	if (rv != 0) {
537 		/*
538 		 * Failed to send a DDS message, so cleanup now.
539 		 */
540 		vsw_hio_free_share(vsharep);
541 		mutex_exit(&vswp->mac_lock);
542 		return;
543 	}
544 	vsharep->vs_state &= ~VSW_SHARE_DDS_ACKD;
545 	vsharep->vs_state |= VSW_SHARE_DDS_SENT;
546 	mutex_exit(&vswp->mac_lock);
547 
548 	/* DERR only to print by default */
549 	DERR(vswp, "Share allocated for ldc_id=0x%lx Cookie=0x%lX",
550 	    ldcp->ldc_id, vsharep->vs_cookie);
551 
552 	D1(vswp, "%s:exit ldc=0x%lx", __func__, ldcp->ldc_id);
553 }
554 
555 /*
556  * vsw_hio_stop -- Stop/clean the HybridIO config for a guest(given ldc).
557  */
558 void
559 vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp)
560 {
561 	vsw_share_t *vsharep;
562 
563 	D1(vswp, "%s:enter ldc=0x%lx", __func__, ldcp->ldc_id);
564 
565 	mutex_enter(&vswp->mac_lock);
566 	vsharep = vsw_hio_find_vshare_ldcid(vswp, ldcp->ldc_id);
567 	if (vsharep == NULL) {
568 		D1(vswp, "%s:no share found for ldc=0x%lx",
569 		    __func__, ldcp->ldc_id);
570 		mutex_exit(&vswp->mac_lock);
571 		return;
572 	}
573 	vsw_hio_free_share(vsharep);
574 	mutex_exit(&vswp->mac_lock);
575 
576 	D1(vswp, "%s:exit ldc=0x%lx", __func__, ldcp->ldc_id);
577 }
578 
579 /*
580  * vsw_hio_send_delshare_msg -- Send a DEL_SHARE message to the	guest.
581  */
582 static int
583 vsw_hio_send_delshare_msg(vsw_share_t *vsharep)
584 {
585 	vsw_t *vswp = vsharep->vs_vswp;
586 	vsw_port_t *portp;
587 	vsw_ldc_list_t	*ldcl;
588 	vsw_ldc_t	*ldcp;
589 	uint32_t	req_id;
590 	uint64_t	cookie = vsharep->vs_cookie;
591 	uint64_t	macaddr = vsharep->vs_macaddr;
592 	int		rv;
593 
594 	ASSERT(MUTEX_HELD(&vswp->mac_lock));
595 	mutex_exit(&vswp->mac_lock);
596 
597 	portp = vsharep->vs_portp;
598 	if (portp == NULL) {
599 		mutex_enter(&vswp->mac_lock);
600 		return (0);
601 	}
602 
603 	ldcl = &portp->p_ldclist;
604 	READ_ENTER(&ldcl->lockrw);
605 	ldcp = ldcl->head;
606 	if ((ldcp == NULL) || (ldcp->ldc_id != vsharep->vs_ldcid)) {
607 		RW_EXIT(&ldcl->lockrw);
608 		mutex_enter(&vswp->mac_lock);
609 		return (0);
610 	}
611 	req_id = VSW_DDS_NEXT_REQID(vsharep);
612 	rv = vsw_send_dds_msg(ldcp, DDS_VNET_DEL_SHARE,
613 	    cookie, macaddr, req_id);
614 
615 	RW_EXIT(&ldcl->lockrw);
616 	mutex_enter(&vswp->mac_lock);
617 	if (rv == 0) {
618 		vsharep->vs_state &= ~VSW_SHARE_DDS_ACKD;
619 		vsharep->vs_state |= VSW_SHARE_DDS_SENT;
620 	}
621 	return (rv);
622 }
623 
624 /*
625  * vsw_send_dds_msg -- Send a DDS message.
626  */
627 static int
628 vsw_send_dds_msg(vsw_ldc_t *ldcp, uint8_t dds_subclass, uint64_t
629     cookie, uint64_t macaddr, uint32_t req_id)
630 {
631 	vsw_t *vswp = ldcp->ldc_port->p_vswp;
632 	vio_dds_msg_t	vmsg;
633 	dds_share_msg_t	*smsg = &vmsg.msg.share_msg;
634 	int rv;
635 
636 	D1(vswp, "%s:enter\n", __func__);
637 	vmsg.tag.vio_msgtype = VIO_TYPE_CTRL;
638 	vmsg.tag.vio_subtype = VIO_SUBTYPE_INFO;
639 	vmsg.tag.vio_subtype_env = VIO_DDS_INFO;
640 	vmsg.tag.vio_sid = ldcp->local_session;
641 	vmsg.dds_class = DDS_VNET_NIU;
642 	vmsg.dds_subclass = dds_subclass;
643 	vmsg.dds_req_id = req_id;
644 	smsg->macaddr = macaddr;
645 	smsg->cookie = cookie;
646 	rv = vsw_send_msg(ldcp, &vmsg, sizeof (vmsg), B_FALSE);
647 	D1(vswp, "%s:exit rv=%d\n", __func__, rv);
648 	return (rv);
649 }
650 
651 /*
652  * vsw_process_dds_msg -- Process a DDS message received from a guest.
653  */
654 void
655 vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg)
656 {
657 	vsw_share_t	*vsharep;
658 	vio_dds_msg_t	*dmsg = msg;
659 
660 	D1(vswp, "%s:enter ldc=0x%lx\n", __func__, ldcp->ldc_id);
661 	if (dmsg->dds_class != DDS_VNET_NIU) {
662 		/* discard */
663 		return;
664 	}
665 	mutex_enter(&vswp->mac_lock);
666 	/*
667 	 * We expect to receive DDS messages only from guests that
668 	 * have HybridIO started.
669 	 */
670 	vsharep = vsw_hio_find_vshare_ldcid(vswp, ldcp->ldc_id);
671 	if (vsharep == NULL) {
672 		mutex_exit(&vswp->mac_lock);
673 		return;
674 	}
675 
676 	switch (dmsg->dds_subclass) {
677 	case DDS_VNET_ADD_SHARE:
678 		/* A response for ADD_SHARE message. */
679 		D1(vswp, "%s:DDS_VNET_ADD_SHARE\n", __func__);
680 		if (!(vsharep->vs_state & VSW_SHARE_DDS_SENT)) {
681 			DWARN(vswp, "%s: invalid ADD_SHARE response  message "
682 			    " share state=0x%X", __func__, vsharep->vs_state);
683 			break;
684 		}
685 
686 		if (dmsg->dds_req_id != vsharep->vs_req_id) {
687 			DWARN(vswp, "%s: invalid req_id in ADD_SHARE response"
688 			    " message req_id=0x%X share's req_id=0x%X",
689 			    __func__, dmsg->dds_req_id, vsharep->vs_req_id);
690 			break;
691 		}
692 
693 		if (dmsg->tag.vio_subtype == VIO_SUBTYPE_NACK) {
694 			DWARN(vswp, "%s: NACK received for ADD_SHARE"
695 			    " message ldcid=0x%lx", __func__, ldcp->ldc_id);
696 			/* cleanup for NACK */
697 			vsw_hio_free_share(vsharep);
698 		} else {
699 			D2(vswp, "%s: ACK received for ADD_SHARE", __func__);
700 			vsharep->vs_state &= ~VSW_SHARE_DDS_SENT;
701 			vsharep->vs_state |= VSW_SHARE_DDS_ACKD;
702 		}
703 		break;
704 
705 	case DDS_VNET_DEL_SHARE:
706 		/* A response for DEL_SHARE message */
707 		D1(vswp, "%s:DDS_VNET_DEL_SHARE\n", __func__);
708 		if (!(vsharep->vs_state & VSW_SHARE_DDS_SENT)) {
709 			DWARN(vswp, "%s: invalid DEL_SHARE response message "
710 			    " share state=0x%X", __func__, vsharep->vs_state);
711 			break;
712 		}
713 
714 		if (dmsg->dds_req_id != vsharep->vs_req_id) {
715 			DWARN(vswp, "%s: invalid req_id in DEL_SHARE response"
716 			    " message share req_id=0x%X share's req_id=0x%X",
717 			    __func__, dmsg->dds_req_id, vsharep->vs_req_id);
718 			break;
719 		}
720 		if (dmsg->tag.vio_subtype == VIO_SUBTYPE_NACK) {
721 			DWARN(vswp, "%s: NACK received for DEL_SHARE",
722 			    __func__);
723 		}
724 
725 		/* There is nothing we can do, free share now */
726 		vsw_hio_free_share(vsharep);
727 		break;
728 
729 	case DDS_VNET_REL_SHARE:
730 		/* Guest has released Share voluntarily, so free it now */
731 		D1(vswp, "%s:DDS_VNET_REL_SHARE\n", __func__);
732 		/* send ACK */
733 		(void) vsw_send_dds_resp_msg(ldcp, dmsg, B_FALSE);
734 		vsw_hio_free_share(vsharep);
735 		break;
736 	default:
737 		DERR(vswp, "%s: Invalid DDS message type=0x%X",
738 		    __func__, dmsg->dds_subclass);
739 		break;
740 	}
741 	mutex_exit(&vswp->mac_lock);
742 	D1(vswp, "%s:exit ldc=0x%lx\n", __func__, ldcp->ldc_id);
743 }
744 
745 /*
746  * vsw_send_dds_resp_msg -- Send a DDS response message.
747  */
748 static int
749 vsw_send_dds_resp_msg(vsw_ldc_t *ldcp, vio_dds_msg_t *dmsg, int ack)
750 {
751 	vsw_t	*vswp = ldcp->ldc_port->p_vswp;
752 	int	rv;
753 
754 	D1(vswp, "%s:enter\n", __func__);
755 	if (ack == B_TRUE) {
756 		dmsg->tag.vio_subtype = VIO_SUBTYPE_ACK;
757 		dmsg->msg.share_resp_msg.status = DDS_VNET_SUCCESS;
758 	} else {
759 		dmsg->tag.vio_subtype = VIO_SUBTYPE_NACK;
760 		dmsg->msg.share_resp_msg.status = DDS_VNET_FAIL;
761 	}
762 	rv = vsw_send_msg(ldcp, dmsg, sizeof (vio_dds_msg_t), B_FALSE);
763 	D1(vswp, "%s:exit rv=%d\n", __func__, rv);
764 	return (rv);
765 }
766 
767 /*
768  * vsw_hio_port_update -- update Hybrid mode change for a port.
769  */
770 void
771 vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled)
772 {
773 	/* Verify if the mode really changed */
774 	if (portp->p_hio_enabled == hio_enabled) {
775 		return;
776 	}
777 
778 	if (hio_enabled == B_FALSE) {
779 		/* Hybrid Mode is disabled, so stop HybridIO */
780 		vsw_hio_stop_port(portp);
781 		portp->p_hio_enabled = B_FALSE;
782 
783 		vsw_port_mac_reconfig(portp, B_FALSE, 0, NULL, 0);
784 	} else {
785 		portp->p_hio_enabled =  B_TRUE;
786 		vsw_port_mac_reconfig(portp, B_FALSE, 0, NULL, 0);
787 
788 		/* reset the port to initiate HybridIO setup */
789 		vsw_hio_port_reset(portp, B_FALSE);
790 	}
791 }
792 
793 /*
794  * vsw_hio_stop_port -- Stop HybridIO for a given port. Sequence
795  *	followed is similar to vsw_hio_free_all_shares().
796  *
797  */
798 void
799 vsw_hio_stop_port(vsw_port_t *portp)
800 {
801 	vsw_t *vswp = portp->p_vswp;
802 	vsw_share_t *vsharep;
803 	int max_retries = vsw_hio_max_cleanup_retries;
804 
805 	D1(vswp, "%s:enter\n", __func__);
806 	mutex_enter(&vswp->mac_lock);
807 
808 	if (vswp->hio_capable == B_FALSE) {
809 		mutex_exit(&vswp->mac_lock);
810 		return;
811 	}
812 
813 	vsharep = vsw_hio_find_vshare_port(vswp, portp);
814 	if (vsharep == NULL) {
815 		mutex_exit(&vswp->mac_lock);
816 		return;
817 	}
818 
819 	do {
820 		if (vsharep->vs_state & VSW_SHARE_DDS_ACKD) {
821 			int rv;
822 
823 			/* send DDS_DEL_SHARE */
824 			D1(vswp, "%s:sending DEL_SHARE msg for "
825 			    "share(%d)", __func__, vsharep->vs_index);
826 			rv = vsw_hio_send_delshare_msg(vsharep);
827 			if (rv != 0) {
828 				/*
829 				 * Cause a port reset to trigger
830 				 * cleanup.
831 				 */
832 				vsw_hio_port_reset(vsharep->vs_portp, B_FALSE);
833 			}
834 		}
835 		if (max_retries == 1) {
836 			/* last retry */
837 			DWARN(vswp, "%s:All retries failed, "
838 			    " cause a reset to trigger cleanup for "
839 			    "share(%d)", __func__, vsharep->vs_index);
840 			vsw_hio_port_reset(vsharep->vs_portp, B_FALSE);
841 		}
842 
843 		/* Check if the share still assigned to this port */
844 		if ((vsharep->vs_portp != portp) ||
845 		    (vsharep->vs_state == VSW_SHARE_FREE)) {
846 			break;
847 		}
848 
849 		/*
850 		 * Release the lock so that reply for DEL_SHARE
851 		 * messages come and get processed, that is, shares
852 		 * get freed.
853 		 */
854 		mutex_exit(&vswp->mac_lock);
855 		drv_usecwait(vsw_hio_cleanup_delay);
856 		mutex_enter(&vswp->mac_lock);
857 
858 		/* Check if the share still assigned to this port */
859 		if ((vsharep->vs_portp != portp) ||
860 		    (vsharep->vs_state == VSW_SHARE_FREE)) {
861 			break;
862 		}
863 		max_retries--;
864 	} while ((vsharep->vs_state != VSW_SHARE_FREE) && (max_retries > 0));
865 
866 	mutex_exit(&vswp->mac_lock);
867 	D1(vswp, "%s:exit\n", __func__);
868 }
869 
870 /*
871  * vsw_hio_rest_all -- Resets all ports that have shares allocated.
872  *	It is called only in the panic code path, so the LDC channels
873  *	are reset immediately.
874  */
875 static void
876 vsw_hio_reset_all(vsw_t *vswp)
877 {
878 	vsw_hio_t	*hiop = &vswp->vhio;
879 	vsw_share_t	*vsharep;
880 	int		i;
881 
882 	D1(vswp, "%s:enter\n", __func__);
883 
884 	if (vswp->hio_capable != B_TRUE)
885 		return;
886 
887 	for (i = 0; i < hiop->vh_num_shares; i++) {
888 		vsharep = &hiop->vh_shares[i];
889 		if (vsharep->vs_state == VSW_SHARE_FREE) {
890 			continue;
891 		}
892 		/*
893 		 * Reset the port with immediate flag enabled,
894 		 * to cause LDC reset immediately.
895 		 */
896 		vsw_hio_port_reset(vsharep->vs_portp, B_TRUE);
897 	}
898 	D1(vswp, "%s:exit\n", __func__);
899 }
900 
901 /*
902  * vsw_hio_reboot_callb -- Called for reboot event. It tries to
903  *	free all currently allocated shares.
904  */
905 /* ARGSUSED */
906 static boolean_t
907 vsw_hio_reboot_callb(void *arg, int code)
908 {
909 	vsw_t *vswp = arg;
910 
911 	D1(vswp, "%s:enter\n", __func__);
912 	vsw_hio_free_all_shares(vswp, B_TRUE);
913 	D1(vswp, "%s:exit\n", __func__);
914 	return (B_TRUE);
915 }
916 
917 /*
918  * vsw_hio_panic_callb -- Called from panic event. It resets all
919  *	the ports that have shares allocated. This is done to
920  *	trigger the cleanup in the guest ahead of HV reset.
921  */
922 /* ARGSUSED */
923 static boolean_t
924 vsw_hio_panic_callb(void *arg, int code)
925 {
926 	vsw_t *vswp = arg;
927 
928 	D1(vswp, "%s:enter\n", __func__);
929 	vsw_hio_reset_all(vswp);
930 	D1(vswp, "%s:exit\n", __func__);
931 	return (B_TRUE);
932 }
933 
934 /*
935  * Setup kstats for hio statistics.
936  */
937 static kstat_t *
938 vsw_hio_setup_kstats(char *ks_mod, char *ks_name, vsw_t *vswp)
939 {
940 	kstat_t			*ksp;
941 	vsw_hio_kstats_t	*hiokp;
942 	vsw_hio_t		*hiop;
943 	char			share_assigned_info[MAXNAMELEN];
944 	size_t			size;
945 	int			i;
946 
947 	hiop = &vswp->vhio;
948 	/*
949 	 * vsw_hio_stats_t structure is variable size structure
950 	 * having fields defined only for one share. So, we need
951 	 * allocate additional space for the rest of the shares.
952 	 */
953 	size = sizeof (vsw_hio_kstats_t) / sizeof (kstat_named_t);
954 	ASSERT(hiop->vh_num_shares >= 1);
955 	size += ((hiop->vh_num_shares - 1) * 2);
956 
957 	ksp = kstat_create(ks_mod, vswp->instance, ks_name, "misc",
958 	    KSTAT_TYPE_NAMED, size, KSTAT_FLAG_VIRTUAL);
959 
960 	if (ksp == NULL) {
961 		return (NULL);
962 	}
963 	hiokp = (vsw_hio_kstats_t *)kmem_zalloc(sizeof (kstat_named_t) *
964 	    size, KM_SLEEP);
965 	ksp->ks_data = hiokp;
966 
967 	hiop->vh_ksp = ksp;
968 	hiop->vh_kstatsp = hiokp;
969 	hiop->vh_kstat_size =  size;
970 
971 	kstat_named_init(&hiokp->hio_capable, "hio_capable", KSTAT_DATA_CHAR);
972 	kstat_named_init(&hiokp->hio_num_shares, "hio_num_shares",
973 	    KSTAT_DATA_ULONG);
974 
975 	for (i = 0; i < hiop->vh_num_shares; i++) {
976 		(void) sprintf(share_assigned_info, "%s%d", "hio_share_", i);
977 		kstat_named_init(&(hiokp->share[i].assigned),
978 		    share_assigned_info, KSTAT_DATA_ULONG);
979 
980 		(void) sprintf(share_assigned_info, "%s%d%s",
981 		    "hio_share_", i, "_state");
982 		kstat_named_init(&(hiokp->share[i].state),
983 		    share_assigned_info, KSTAT_DATA_ULONG);
984 	}
985 
986 	ksp->ks_update = vsw_hio_kstats_update;
987 	ksp->ks_private = (void *)vswp;
988 	kstat_install(ksp);
989 	return (ksp);
990 }
991 
992 /*
993  * Destroy hio kstats.
994  */
995 static void
996 vsw_hio_destroy_kstats(vsw_t *vswp)
997 {
998 	kstat_t			*ksp;
999 	vsw_hio_t		*hiop;
1000 
1001 	ASSERT(vswp != NULL);
1002 
1003 	ksp = vswp->vhio.vh_ksp;
1004 	hiop = &vswp->vhio;
1005 	if (ksp != NULL) {
1006 		kmem_free(hiop->vh_kstatsp, sizeof (kstat_named_t) *
1007 		    hiop->vh_kstat_size);
1008 		kstat_delete(ksp);
1009 		hiop->vh_kstatsp = NULL;
1010 		hiop->vh_ksp = NULL;
1011 	}
1012 }
1013 
1014 /*
1015  * Update hio kstats.
1016  */
1017 static int
1018 vsw_hio_kstats_update(kstat_t *ksp, int rw)
1019 {
1020 	vsw_t			*vswp;
1021 	vsw_hio_t		*hiop;
1022 	vsw_hio_kstats_t	*hiokp;
1023 	int			i;
1024 
1025 	vswp = (vsw_t *)ksp->ks_private;
1026 	ASSERT(vswp != NULL);
1027 
1028 	hiop = &vswp->vhio;
1029 	hiokp = hiop->vh_kstatsp;
1030 
1031 	if (rw == KSTAT_READ) {
1032 		if (vswp->hio_capable) {
1033 			(void) strcpy(hiokp->hio_capable.value.c, "Yes");
1034 		} else {
1035 			/* not hio capable, just return */
1036 			(void) strcpy(hiokp->hio_capable.value.c, "No");
1037 			return (0);
1038 		}
1039 
1040 		mutex_enter(&vswp->mac_lock);
1041 		hiokp->hio_num_shares.value.ul = (uint32_t)hiop->vh_num_shares;
1042 		for (i = 0; i < hiop->vh_num_shares; i++) {
1043 			hiokp->share[i].assigned.value.ul =
1044 			    hiop->vh_shares[i].vs_macaddr;
1045 			hiokp->share[i].state.value.ul =
1046 			    hiop->vh_shares[i].vs_state;
1047 		}
1048 		mutex_exit(&vswp->mac_lock);
1049 	} else {
1050 		return (EACCES);
1051 	}
1052 
1053 	return (0);
1054 }
1055