xref: /illumos-gate/usr/src/uts/sun4v/io/vsw_hio.c (revision b4128092752f04132443f3dd6bc22b84cf15cf33)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/debug.h>
30 #include <sys/time.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/user.h>
34 #include <sys/stropts.h>
35 #include <sys/stream.h>
36 #include <sys/strlog.h>
37 #include <sys/strsubr.h>
38 #include <sys/cmn_err.h>
39 #include <sys/cpu.h>
40 #include <sys/kmem.h>
41 #include <sys/conf.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/ksynch.h>
45 #include <sys/stat.h>
46 #include <sys/kstat.h>
47 #include <sys/vtrace.h>
48 #include <sys/strsun.h>
49 #include <sys/dlpi.h>
50 #include <sys/ethernet.h>
51 #include <net/if.h>
52 #include <sys/varargs.h>
53 #include <sys/machsystm.h>
54 #include <sys/modctl.h>
55 #include <sys/modhash.h>
56 #include <sys/mac.h>
57 #include <sys/mac_ether.h>
58 #include <sys/taskq.h>
59 #include <sys/note.h>
60 #include <sys/mach_descrip.h>
61 #include <sys/mac.h>
62 #include <sys/mdeg.h>
63 #include <sys/ldc.h>
64 #include <sys/vsw_fdb.h>
65 #include <sys/vsw.h>
66 #include <sys/vio_mailbox.h>
67 #include <sys/vnet_mailbox.h>
68 #include <sys/vnet_common.h>
69 #include <sys/vio_util.h>
70 #include <sys/sdt.h>
71 #include <sys/atomic.h>
72 #include <sys/callb.h>
73 
74 
75 #define	VSW_DDS_NEXT_REQID(vsharep)	(++vsharep->vs_req_id)
76 
77 extern boolean_t vsw_hio_enabled;		/* HybridIO enabled? */
78 extern int vsw_hio_max_cleanup_retries;
79 extern int vsw_hio_cleanup_delay;
80 
81 /* Functions imported from other files */
82 extern int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t);
83 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int);
84 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int);
85 extern void vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate);
86 
87 /* Functions exported to other files */
88 void vsw_hio_init(vsw_t *vswp);
89 void vsw_hio_cleanup(vsw_t *vswp);
90 void vsw_hio_start(vsw_t *vswp, vsw_ldc_t *ldcp);
91 void vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp);
92 void vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg);
93 void vsw_hio_start_ports(vsw_t *vswp);
94 void vsw_hio_stop_port(vsw_port_t *portp);
95 
96 /* Support functions */
97 static void vsw_hio_free_all_shares(vsw_t *vswp, boolean_t reboot);
98 static vsw_share_t *vsw_hio_alloc_share(vsw_t *vswp, vsw_ldc_t *ldcp);
99 static void vsw_hio_free_share(vsw_share_t *vsharep);
100 static vsw_share_t *vsw_hio_find_free_share(vsw_t *vswp);
101 static vsw_share_t *vsw_hio_find_vshare_ldcid(vsw_t *vswp, uint64_t ldc_id);
102 static vsw_share_t *vsw_hio_find_vshare_port(vsw_t *vswp, vsw_port_t *portp);
103 static int vsw_send_dds_msg(vsw_ldc_t *ldcp, uint8_t dds_subclass,
104     uint64_t cookie, uint64_t macaddr, uint32_t req_id);
105 static int vsw_send_dds_resp_msg(vsw_ldc_t *ldcp, vio_dds_msg_t *dmsg, int ack);
106 static int vsw_hio_send_delshare_msg(vsw_share_t *vsharep);
107 static int vsw_hio_bind_macaddr(vsw_share_t *vsharep);
108 static void vsw_hio_unbind_macaddr(vsw_share_t *vsharep);
109 static boolean_t vsw_hio_reboot_callb(void *arg, int code);
110 static boolean_t vsw_hio_panic_callb(void *arg, int code);
111 
112 static kstat_t *vsw_hio_setup_kstats(char *ks_mod, char *ks_name, vsw_t *vswp);
113 static void vsw_hio_destroy_kstats(vsw_t *vswp);
114 static int vsw_hio_kstats_update(kstat_t *ksp, int rw);
115 
116 /*
117  * vsw_hio_init -- Initialize the HybridIO related info.
118  *	- Query SHARES and RINGS capability. Both capabilities
119  *	  need to be supported by the physical-device.
120  */
121 void
122 vsw_hio_init(vsw_t *vswp)
123 {
124 	vsw_hio_t	*hiop = &vswp->vhio;
125 	int		i;
126 	int		rv;
127 
128 	D1(vswp, "%s:enter\n", __func__);
129 	mutex_enter(&vswp->hw_lock);
130 	if (vsw_hio_enabled == B_FALSE) {
131 		mutex_exit(&vswp->hw_lock);
132 		return;
133 	}
134 
135 	vswp->hio_capable = B_FALSE;
136 	rv = mac_capab_get(vswp->mh, MAC_CAPAB_SHARES, &hiop->vh_scapab);
137 	if (rv == B_FALSE) {
138 		D2(vswp, "%s: %s is not HybridIO capable\n", __func__,
139 		    vswp->physname);
140 		mutex_exit(&vswp->hw_lock);
141 		return;
142 	}
143 	rv = mac_capab_get(vswp->mh, MAC_CAPAB_RINGS, &hiop->vh_rcapab);
144 	if (rv == B_FALSE) {
145 		DWARN(vswp, "%s: %s has no RINGS capability\n", __func__,
146 		    vswp->physname);
147 		mutex_exit(&vswp->hw_lock);
148 		return;
149 	}
150 	hiop->vh_num_shares = hiop->vh_scapab.ms_snum;
151 	hiop->vh_shares = kmem_zalloc((sizeof (vsw_share_t) *
152 	    hiop->vh_num_shares), KM_SLEEP);
153 	for (i = 0; i < hiop->vh_num_shares; i++) {
154 		hiop->vh_shares[i].vs_state = VSW_SHARE_FREE;
155 		hiop->vh_shares[i].vs_index = i;
156 		hiop->vh_shares[i].vs_vswp = vswp;
157 	}
158 	vswp->hio_capable = B_TRUE;
159 
160 	/*
161 	 * Register to get reboot and panic events so that
162 	 * we can cleanup HybridIO resources gracefully.
163 	 */
164 	vswp->hio_reboot_cb_id = callb_add(vsw_hio_reboot_callb,
165 	    (void *)vswp, CB_CL_MDBOOT, "vsw_hio");
166 
167 	vswp->hio_panic_cb_id = callb_add(vsw_hio_panic_callb,
168 	    (void *)vswp, CB_CL_PANIC, "vsw_hio");
169 
170 	/* setup kstats for hybrid resources */
171 	hiop->vh_ksp = vsw_hio_setup_kstats(DRV_NAME, "hio", vswp);
172 	if (hiop->vh_ksp == NULL) {
173 		DERR(vswp, "%s: kstats setup failed", __func__);
174 	}
175 
176 	D2(vswp, "%s: %s is HybridIO capable num_shares=%d\n", __func__,
177 	    vswp->physname, hiop->vh_num_shares);
178 	D1(vswp, "%s:exit\n", __func__);
179 	mutex_exit(&vswp->hw_lock);
180 }
181 
182 /*
183  * vsw_hio_alloc_share -- Allocate and setup the share for a guest domain.
184  *	- Allocate a free share.
185  *	- Bind the Guest's MAC address.
186  */
187 static vsw_share_t *
188 vsw_hio_alloc_share(vsw_t *vswp, vsw_ldc_t *ldcp)
189 {
190 	vsw_hio_t	*hiop = &vswp->vhio;
191 	mac_capab_share_t *hcapab = &hiop->vh_scapab;
192 	vsw_share_t	*vsharep;
193 	vsw_port_t	*portp = ldcp->ldc_port;
194 	uint64_t	ldc_id = ldcp->ldc_id;
195 	uint32_t	rmin, rmax;
196 	uint64_t	rmap;
197 	int		rv;
198 
199 	D1(vswp, "%s:enter\n", __func__);
200 	vsharep = vsw_hio_find_free_share(vswp);
201 	if (vsharep == NULL) {
202 		/* No free shares available */
203 		return (NULL);
204 	}
205 	/*
206 	 * Allocate a Share - it will come with rings/groups
207 	 * already assigned to it.
208 	 */
209 	rv = hcapab->ms_salloc(hcapab->ms_handle, ldc_id,
210 	    &vsharep->vs_cookie, &vsharep->vs_shdl);
211 	if (rv != 0) {
212 		D2(vswp, "Alloc a share failed for ldc=0x%lx rv=%d",
213 		    ldc_id, rv);
214 		return (NULL);
215 	}
216 
217 	/*
218 	 * Query the RX group number to bind the port's
219 	 * MAC address to it.
220 	 */
221 	hcapab->ms_squery(vsharep->vs_shdl, MAC_RING_TYPE_RX,
222 	    &rmin, &rmax, &rmap, &vsharep->vs_gnum);
223 
224 	/* Cache some useful info */
225 	vsharep->vs_ldcid = ldcp->ldc_id;
226 	vsharep->vs_macaddr = vnet_macaddr_strtoul(
227 	    portp->p_macaddr.ether_addr_octet);
228 	vsharep->vs_portp = ldcp->ldc_port;
229 
230 	/* Bind the Guest's MAC address */
231 	rv = vsw_hio_bind_macaddr(vsharep);
232 	if (rv != 0) {
233 		/* something went wrong, cleanup */
234 		hcapab->ms_sfree(vsharep->vs_shdl);
235 		return (NULL);
236 	}
237 
238 	vsharep->vs_state |= VSW_SHARE_ASSIGNED;
239 
240 	D1(vswp, "%s:exit\n", __func__);
241 	return (vsharep);
242 }
243 
244 /*
245  * vsw_hio_bind_macaddr -- Remove the port's MAC address from the
246  *	physdev and bind it to the Share's RX group.
247  */
248 static int
249 vsw_hio_bind_macaddr(vsw_share_t *vsharep)
250 {
251 	vsw_t		*vswp = vsharep->vs_vswp;
252 	vsw_port_t	*portp = vsharep->vs_portp;
253 	mac_capab_rings_t *rcapab = &vswp->vhio.vh_rcapab;
254 	mac_group_info_t *ginfop = &vsharep->vs_rxginfo;
255 	int		rv;
256 
257 	/* Get the RX groupinfo */
258 	rcapab->mr_gget(rcapab->mr_handle, MAC_RING_TYPE_RX,
259 	    vsharep->vs_gnum, &vsharep->vs_rxginfo, NULL);
260 
261 	/* Unset the MAC address first */
262 	if (portp->addr_set != VSW_ADDR_UNSET) {
263 		(void) vsw_unset_hw(vswp, portp, VSW_VNETPORT);
264 	}
265 
266 	/* Bind the MAC address to the RX group */
267 	rv = ginfop->mrg_addmac(ginfop->mrg_driver,
268 	    (uint8_t *)&portp->p_macaddr.ether_addr_octet);
269 	if (rv != 0) {
270 		/* Restore the address back as it was */
271 		(void) vsw_set_hw(vswp, portp, VSW_VNETPORT);
272 		return (rv);
273 	}
274 	return (0);
275 }
276 
277 /*
278  * vsw_hio_unbind_macaddr -- Unbind the port's MAC address and restore
279  *	it back as it was before.
280  */
281 static void
282 vsw_hio_unbind_macaddr(vsw_share_t *vsharep)
283 {
284 	vsw_t		*vswp = vsharep->vs_vswp;
285 	vsw_port_t	*portp = vsharep->vs_portp;
286 	mac_group_info_t *ginfop = &vsharep->vs_rxginfo;
287 
288 	if (portp == NULL) {
289 		return;
290 	}
291 	/* Unbind the MAC address from the RX group */
292 	(void) ginfop->mrg_remmac(ginfop->mrg_driver,
293 	    (uint8_t *)&portp->p_macaddr.ether_addr_octet);
294 
295 	/* Program the MAC address back */
296 	(void) vsw_set_hw(vswp, portp, VSW_VNETPORT);
297 }
298 
299 /*
300  * vsw_hio_find_free_share -- Find a free Share.
301  */
302 static vsw_share_t *
303 vsw_hio_find_free_share(vsw_t *vswp)
304 {
305 	vsw_hio_t *hiop = &vswp->vhio;
306 	vsw_share_t *vsharep;
307 	int i;
308 
309 	D1(vswp, "%s:enter\n", __func__);
310 	for (i = 0; i < hiop->vh_num_shares; i++) {
311 		vsharep = &hiop->vh_shares[i];
312 		if (vsharep->vs_state == VSW_SHARE_FREE) {
313 			D1(vswp, "%s:Returning free share(%d)\n",
314 			    __func__, vsharep->vs_index);
315 			return (vsharep);
316 		}
317 	}
318 	D1(vswp, "%s:no free share\n", __func__);
319 	return (NULL);
320 }
321 
322 /*
323  * vsw_hio_find_vshare_ldcid -- Given ldc_id, find the corresponding
324  *	share structure.
325  */
326 static vsw_share_t *
327 vsw_hio_find_vshare_ldcid(vsw_t *vswp, uint64_t ldc_id)
328 {
329 	vsw_hio_t *hiop = &vswp->vhio;
330 	vsw_share_t *vsharep;
331 	int i;
332 
333 	D1(vswp, "%s:enter, ldc=0x%lx", __func__, ldc_id);
334 	for (i = 0; i < hiop->vh_num_shares; i++) {
335 		vsharep = &hiop->vh_shares[i];
336 		if (vsharep->vs_state == VSW_SHARE_FREE) {
337 			continue;
338 		}
339 		if (vsharep->vs_ldcid == ldc_id) {
340 			D1(vswp, "%s:returning share(%d)",
341 			    __func__, vsharep->vs_index);
342 			return (vsharep);
343 		}
344 	}
345 	D1(vswp, "%s:returning NULL", __func__);
346 	return (NULL);
347 }
348 
349 /*
350  * vsw_hio_find_vshare_port -- Given portp, find the corresponding
351  *	share structure.
352  */
353 static vsw_share_t *
354 vsw_hio_find_vshare_port(vsw_t *vswp, vsw_port_t *portp)
355 {
356 	vsw_hio_t *hiop = &vswp->vhio;
357 	vsw_share_t *vsharep;
358 	int i;
359 
360 	D1(vswp, "%s:enter, portp=0x%p", __func__, portp);
361 	for (i = 0; i < hiop->vh_num_shares; i++) {
362 		vsharep = &hiop->vh_shares[i];
363 		if (vsharep->vs_state == VSW_SHARE_FREE) {
364 			continue;
365 		}
366 		if (vsharep->vs_portp == portp) {
367 			D1(vswp, "%s:returning share(%d)",
368 			    __func__, vsharep->vs_index);
369 			return (vsharep);
370 		}
371 	}
372 	D1(vswp, "%s:returning NULL", __func__);
373 	return (NULL);
374 }
375 
376 /*
377  * vsw_hio_free_share -- Unbind the MAC address and free share.
378  */
379 static void
380 vsw_hio_free_share(vsw_share_t *vsharep)
381 {
382 	vsw_t		*vswp = vsharep->vs_vswp;
383 	vsw_hio_t	*hiop = &vswp->vhio;
384 	mac_capab_share_t *hcapab = &hiop->vh_scapab;
385 
386 	D1(vswp, "%s:enter\n", __func__);
387 
388 	/* First unbind the MAC address and restore it back */
389 	vsw_hio_unbind_macaddr(vsharep);
390 
391 	/* free share */
392 	hcapab->ms_sfree(vsharep->vs_shdl);
393 	vsharep->vs_state = VSW_SHARE_FREE;
394 	vsharep->vs_macaddr = 0;
395 
396 	/* DERR only for printing by default */
397 	DERR(vswp, "Share freed for ldc_id=0x%lx Cookie=0x%lX",
398 	    vsharep->vs_ldcid, vsharep->vs_cookie);
399 	D1(vswp, "%s:exit\n", __func__);
400 }
401 
402 
403 /*
404  * vsw_hio_cleanup -- Cleanup the HybridIO. It unregisters the callbs
405  *	and frees all shares.
406  */
407 void
408 vsw_hio_cleanup(vsw_t *vswp)
409 {
410 	D1(vswp, "%s:enter\n", __func__);
411 
412 	/* Unregister reboot and panic callbs. */
413 	if (vswp->hio_reboot_cb_id) {
414 		(void) callb_delete(vswp->hio_reboot_cb_id);
415 		vswp->hio_reboot_cb_id = 0;
416 	}
417 	if (vswp->hio_panic_cb_id) {
418 		(void) callb_delete(vswp->hio_panic_cb_id);
419 		vswp->hio_panic_cb_id = 0;
420 	}
421 	vsw_hio_free_all_shares(vswp, B_FALSE);
422 	vsw_hio_destroy_kstats(vswp);
423 	D1(vswp, "%s:exit\n", __func__);
424 }
425 
426 /*
427  * vsw_hio_free_all_shares -- A routine to free all shares gracefully.
428  *	The following are the steps followed to accomplish this:
429  *
430  *	- First clear 'hio_capable' to avoid further share allocations.
431  *	- If a share is in accepted(ACKD) state, that means the guest
432  *	  has HybridIO setup etc. If so, send a DEL_SHARE message and
433  *	  give some time(delay) for the guest to ACK.
434  *	- If the Share is another state, give some time to transition to
435  *	  ACKD state, then try the above.
436  *	- After max retries, reset the ports to brute force the shares
437  *	  to be freed. Give a little delay for the LDC reset code to
438  *	  free the Share.
439  */
440 static void
441 vsw_hio_free_all_shares(vsw_t *vswp, boolean_t reboot)
442 {
443 	vsw_hio_t	*hiop = &vswp->vhio;
444 	vsw_port_list_t	*plist = &vswp->plist;
445 	vsw_share_t	*vsharep;
446 	int		free_shares = 0;
447 	int		max_retries = vsw_hio_max_cleanup_retries;
448 	int		i;
449 
450 	D1(vswp, "%s:enter\n", __func__);
451 
452 	/*
453 	 * Acquire plist->lockrw to make the locking a bit easier
454 	 * and keep the ports in a stable state while we are cleaningup
455 	 * HybridIO.
456 	 */
457 	READ_ENTER(&plist->lockrw);
458 	mutex_enter(&vswp->hw_lock);
459 	/*
460 	 * first clear the hio_capable flag so that no more
461 	 * HybridIO operations are initiated.
462 	 */
463 	vswp->hio_capable = B_FALSE;
464 
465 	do {
466 		free_shares = 0;
467 		for (i = 0; i < hiop->vh_num_shares; i++) {
468 			vsharep = &hiop->vh_shares[i];
469 			if (vsharep->vs_state == VSW_SHARE_FREE) {
470 				free_shares++;
471 				continue;
472 			}
473 			/*
474 			 * If the share is in DDS_ACKD state, then
475 			 * send DEL_SHARE message so that guest can
476 			 * release its Hybrid resource.
477 			 */
478 			if (vsharep->vs_state & VSW_SHARE_DDS_ACKD) {
479 				int rv;
480 
481 				/* send DDS_DEL_SHARE */
482 				D1(vswp, "%s:sending DEL_SHARE msg for "
483 				    "share(%d)", __func__, vsharep->vs_index);
484 				rv = vsw_hio_send_delshare_msg(vsharep);
485 				if (rv != 0) {
486 					/*
487 					 * No alternative, reset the port
488 					 * to force the release of Hybrid
489 					 * resources.
490 					 */
491 					vsw_hio_port_reset(vsharep->vs_portp,
492 					    B_FALSE);
493 				}
494 			}
495 			if (max_retries == 1) {
496 				/*
497 				 * Last retry,  reset the port.
498 				 * If it is reboot case, issue an immediate
499 				 * reset.
500 				 */
501 				DWARN(vswp, "%s:All retries failed, "
502 				    " cause a reset to trigger cleanup for "
503 				    "share(%d)", __func__, vsharep->vs_index);
504 				vsw_hio_port_reset(vsharep->vs_portp, reboot);
505 			}
506 		}
507 		if (free_shares == hiop->vh_num_shares) {
508 			/* Clean up is done */
509 			break;
510 		}
511 		/*
512 		 * Release the lock so that reply for DEL_SHARE
513 		 * messages come and get processed, that is, shares
514 		 * get freed.
515 		 * This delay is also needed for the port reset to
516 		 * release the Hybrid resource.
517 		 */
518 		mutex_exit(&vswp->hw_lock);
519 		drv_usecwait(vsw_hio_cleanup_delay);
520 		mutex_enter(&vswp->hw_lock);
521 		max_retries--;
522 	} while ((free_shares < hiop->vh_num_shares) && (max_retries > 0));
523 
524 	/* By now, all shares should be freed */
525 	if (free_shares != hiop->vh_num_shares) {
526 		if (reboot == B_FALSE) {
527 			cmn_err(CE_NOTE, "vsw%d: All physical resources "
528 			    "could not be freed", vswp->instance);
529 		}
530 	}
531 
532 	kmem_free(hiop->vh_shares, sizeof (vsw_share_t) * hiop->vh_num_shares);
533 	hiop->vh_shares = NULL;
534 	hiop->vh_num_shares = 0;
535 	mutex_exit(&vswp->hw_lock);
536 	RW_EXIT(&plist->lockrw);
537 	D1(vswp, "%s:exit\n", __func__);
538 }
539 
540 /*
541  * vsw_hio_start_ports -- Start HybridIO for ports that have
542  *	already established connection before HybridIO is intialized.
543  */
544 void
545 vsw_hio_start_ports(vsw_t *vswp)
546 {
547 	vsw_port_list_t	*plist = &vswp->plist;
548 	vsw_port_t	*portp;
549 	vsw_share_t	*vsharep;
550 	boolean_t	reset;
551 
552 	if (vswp->hio_capable == B_FALSE) {
553 		return;
554 	}
555 	READ_ENTER(&plist->lockrw);
556 	for (portp = plist->head; portp != NULL; portp = portp->p_next) {
557 		if ((portp->p_hio_enabled == B_FALSE) ||
558 		    (portp->p_hio_capable == B_FALSE)) {
559 			continue;
560 		}
561 
562 		reset = B_FALSE;
563 		mutex_enter(&vswp->hw_lock);
564 		vsharep = vsw_hio_find_vshare_port(vswp, portp);
565 		if (vsharep == NULL) {
566 			reset = B_TRUE;
567 		}
568 		mutex_exit(&vswp->hw_lock);
569 
570 		if (reset == B_TRUE) {
571 			/* Cause a rest to trigger HybridIO setup */
572 			vsw_hio_port_reset(portp, B_FALSE);
573 		}
574 	}
575 	RW_EXIT(&plist->lockrw);
576 }
577 
578 /*
579  * vsw_hio_start -- Start HybridIO for a guest(given LDC)
580  */
581 void
582 vsw_hio_start(vsw_t *vswp, vsw_ldc_t *ldcp)
583 {
584 	vsw_share_t	*vsharep;
585 	uint32_t	req_id;
586 	int		rv;
587 
588 	D1(vswp, "%s:enter ldc=0x%lx", __func__, ldcp->ldc_id);
589 	mutex_enter(&vswp->hw_lock);
590 	if (vswp->hio_capable == B_FALSE) {
591 		mutex_exit(&vswp->hw_lock);
592 		D2(vswp, "%s:not HIO capable", __func__);
593 		return;
594 	}
595 
596 	/* Verify if a share was already allocated */
597 	vsharep = vsw_hio_find_vshare_ldcid(vswp, ldcp->ldc_id);
598 	if (vsharep != NULL) {
599 		mutex_exit(&vswp->hw_lock);
600 		D2(vswp, "%s:Share already allocated to ldc=0x%lx",
601 		    __func__, ldcp->ldc_id);
602 		return;
603 	}
604 	vsharep = vsw_hio_alloc_share(vswp, ldcp);
605 	if (vsharep == NULL) {
606 		mutex_exit(&vswp->hw_lock);
607 		D2(vswp, "%s: no Share available for ldc=0x%lx",
608 		    __func__, ldcp->ldc_id);
609 		return;
610 	}
611 	req_id = VSW_DDS_NEXT_REQID(vsharep);
612 	rv = vsw_send_dds_msg(ldcp, DDS_VNET_ADD_SHARE, vsharep->vs_cookie,
613 	    vsharep->vs_macaddr, req_id);
614 	if (rv != 0) {
615 		/*
616 		 * Failed to send a DDS message, so cleanup now.
617 		 */
618 		vsw_hio_free_share(vsharep);
619 		mutex_exit(&vswp->hw_lock);
620 		return;
621 	}
622 	vsharep->vs_state &= ~VSW_SHARE_DDS_ACKD;
623 	vsharep->vs_state |= VSW_SHARE_DDS_SENT;
624 	mutex_exit(&vswp->hw_lock);
625 
626 	/* DERR only to print by default */
627 	DERR(vswp, "Share allocated for ldc_id=0x%lx Cookie=0x%lX",
628 	    ldcp->ldc_id, vsharep->vs_cookie);
629 
630 	D1(vswp, "%s:exit ldc=0x%lx", __func__, ldcp->ldc_id);
631 }
632 
633 /*
634  * vsw_hio_stop -- Stop/clean the HybridIO config for a guest(given ldc).
635  */
636 void
637 vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp)
638 {
639 	vsw_share_t *vsharep;
640 
641 	D1(vswp, "%s:enter ldc=0x%lx", __func__, ldcp->ldc_id);
642 
643 	mutex_enter(&vswp->hw_lock);
644 	vsharep = vsw_hio_find_vshare_ldcid(vswp, ldcp->ldc_id);
645 	if (vsharep == NULL) {
646 		D1(vswp, "%s:no share found for ldc=0x%lx",
647 		    __func__, ldcp->ldc_id);
648 		mutex_exit(&vswp->hw_lock);
649 		return;
650 	}
651 	vsw_hio_free_share(vsharep);
652 	mutex_exit(&vswp->hw_lock);
653 
654 	D1(vswp, "%s:exit ldc=0x%lx", __func__, ldcp->ldc_id);
655 }
656 
657 /*
658  * vsw_hio_send_delshare_msg -- Send a DEL_SHARE message to the	guest.
659  */
660 static int
661 vsw_hio_send_delshare_msg(vsw_share_t *vsharep)
662 {
663 	vsw_t *vswp = vsharep->vs_vswp;
664 	vsw_port_t *portp;
665 	vsw_ldc_list_t	*ldcl;
666 	vsw_ldc_t	*ldcp;
667 	uint32_t	req_id;
668 	uint64_t	cookie = vsharep->vs_cookie;
669 	uint64_t	macaddr = vsharep->vs_macaddr;
670 	int		rv;
671 
672 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
673 	mutex_exit(&vswp->hw_lock);
674 
675 	portp = vsharep->vs_portp;
676 	if (portp == NULL) {
677 		mutex_enter(&vswp->hw_lock);
678 		return (0);
679 	}
680 
681 	ldcl = &portp->p_ldclist;
682 	READ_ENTER(&ldcl->lockrw);
683 	ldcp = ldcl->head;
684 	if ((ldcp == NULL) || (ldcp->ldc_id != vsharep->vs_ldcid)) {
685 		RW_EXIT(&ldcl->lockrw);
686 		mutex_enter(&vswp->hw_lock);
687 		return (0);
688 	}
689 	req_id = VSW_DDS_NEXT_REQID(vsharep);
690 	rv = vsw_send_dds_msg(ldcp, DDS_VNET_DEL_SHARE,
691 	    cookie, macaddr, req_id);
692 
693 	RW_EXIT(&ldcl->lockrw);
694 	mutex_enter(&vswp->hw_lock);
695 	if (rv == 0) {
696 		vsharep->vs_state &= ~VSW_SHARE_DDS_ACKD;
697 		vsharep->vs_state |= VSW_SHARE_DDS_SENT;
698 	}
699 	return (rv);
700 }
701 
702 /*
703  * vsw_send_dds_msg -- Send a DDS message.
704  */
705 static int
706 vsw_send_dds_msg(vsw_ldc_t *ldcp, uint8_t dds_subclass, uint64_t
707     cookie, uint64_t macaddr, uint32_t req_id)
708 {
709 	vsw_t *vswp = ldcp->ldc_port->p_vswp;
710 	vio_dds_msg_t	vmsg;
711 	dds_share_msg_t	*smsg = &vmsg.msg.share_msg;
712 	int rv;
713 
714 	D1(vswp, "%s:enter\n", __func__);
715 	vmsg.tag.vio_msgtype = VIO_TYPE_CTRL;
716 	vmsg.tag.vio_subtype = VIO_SUBTYPE_INFO;
717 	vmsg.tag.vio_subtype_env = VIO_DDS_INFO;
718 	vmsg.tag.vio_sid = ldcp->local_session;
719 	vmsg.dds_class = DDS_VNET_NIU;
720 	vmsg.dds_subclass = dds_subclass;
721 	vmsg.dds_req_id = req_id;
722 	smsg->macaddr = macaddr;
723 	smsg->cookie = cookie;
724 	rv = vsw_send_msg(ldcp, &vmsg, sizeof (vmsg), B_FALSE);
725 	D1(vswp, "%s:exit rv=%d\n", __func__, rv);
726 	return (rv);
727 }
728 
729 /*
730  * vsw_process_dds_msg -- Process a DDS message received from a guest.
731  */
732 void
733 vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg)
734 {
735 	vsw_share_t	*vsharep;
736 	vio_dds_msg_t	*dmsg = msg;
737 
738 	D1(vswp, "%s:enter ldc=0x%lx\n", __func__, ldcp->ldc_id);
739 	if (dmsg->dds_class != DDS_VNET_NIU) {
740 		/* discard */
741 		return;
742 	}
743 	mutex_enter(&vswp->hw_lock);
744 	/*
745 	 * We expect to receive DDS messages only from guests that
746 	 * have HybridIO started.
747 	 */
748 	vsharep = vsw_hio_find_vshare_ldcid(vswp, ldcp->ldc_id);
749 	if (vsharep == NULL) {
750 		mutex_exit(&vswp->hw_lock);
751 		return;
752 	}
753 
754 	switch (dmsg->dds_subclass) {
755 	case DDS_VNET_ADD_SHARE:
756 		/* A response for ADD_SHARE message. */
757 		D1(vswp, "%s:DDS_VNET_ADD_SHARE\n", __func__);
758 		if (!(vsharep->vs_state & VSW_SHARE_DDS_SENT)) {
759 			DWARN(vswp, "%s: invalid ADD_SHARE response  message "
760 			    " share state=0x%X", __func__, vsharep->vs_state);
761 			break;
762 		}
763 
764 		if (dmsg->dds_req_id != vsharep->vs_req_id) {
765 			DWARN(vswp, "%s: invalid req_id in ADD_SHARE response"
766 			    " message req_id=0x%X share's req_id=0x%X",
767 			    __func__, dmsg->dds_req_id, vsharep->vs_req_id);
768 			break;
769 		}
770 
771 		if (dmsg->tag.vio_subtype == VIO_SUBTYPE_NACK) {
772 			DWARN(vswp, "%s: NACK received for ADD_SHARE"
773 			    " message ldcid=0x%lx", __func__, ldcp->ldc_id);
774 			/* cleanup for NACK */
775 			vsw_hio_free_share(vsharep);
776 		} else {
777 			D2(vswp, "%s: ACK received for ADD_SHARE", __func__);
778 			vsharep->vs_state &= ~VSW_SHARE_DDS_SENT;
779 			vsharep->vs_state |= VSW_SHARE_DDS_ACKD;
780 		}
781 		break;
782 
783 	case DDS_VNET_DEL_SHARE:
784 		/* A response for DEL_SHARE message */
785 		D1(vswp, "%s:DDS_VNET_DEL_SHARE\n", __func__);
786 		if (!(vsharep->vs_state & VSW_SHARE_DDS_SENT)) {
787 			DWARN(vswp, "%s: invalid DEL_SHARE response message "
788 			    " share state=0x%X", __func__, vsharep->vs_state);
789 			break;
790 		}
791 
792 		if (dmsg->dds_req_id != vsharep->vs_req_id) {
793 			DWARN(vswp, "%s: invalid req_id in DEL_SHARE response"
794 			    " message share req_id=0x%X share's req_id=0x%X",
795 			    __func__, dmsg->dds_req_id, vsharep->vs_req_id);
796 			break;
797 		}
798 		if (dmsg->tag.vio_subtype == VIO_SUBTYPE_NACK) {
799 			DWARN(vswp, "%s: NACK received for DEL_SHARE",
800 			    __func__);
801 		}
802 
803 		/* There is nothing we can do, free share now */
804 		vsw_hio_free_share(vsharep);
805 		break;
806 
807 	case DDS_VNET_REL_SHARE:
808 		/* Guest has released Share voluntarily, so free it now */
809 		D1(vswp, "%s:DDS_VNET_REL_SHARE\n", __func__);
810 		/* send ACK */
811 		(void) vsw_send_dds_resp_msg(ldcp, dmsg, B_FALSE);
812 		vsw_hio_free_share(vsharep);
813 		break;
814 	default:
815 		DERR(vswp, "%s: Invalid DDS message type=0x%X",
816 		    __func__, dmsg->dds_subclass);
817 		break;
818 	}
819 	mutex_exit(&vswp->hw_lock);
820 	D1(vswp, "%s:exit ldc=0x%lx\n", __func__, ldcp->ldc_id);
821 }
822 
823 /*
824  * vsw_send_dds_resp_msg -- Send a DDS response message.
825  */
826 static int
827 vsw_send_dds_resp_msg(vsw_ldc_t *ldcp, vio_dds_msg_t *dmsg, int ack)
828 {
829 	vsw_t	*vswp = ldcp->ldc_port->p_vswp;
830 	int	rv;
831 
832 	D1(vswp, "%s:enter\n", __func__);
833 	if (ack == B_TRUE) {
834 		dmsg->tag.vio_subtype = VIO_SUBTYPE_ACK;
835 		dmsg->msg.share_resp_msg.status = DDS_VNET_SUCCESS;
836 	} else {
837 		dmsg->tag.vio_subtype = VIO_SUBTYPE_NACK;
838 		dmsg->msg.share_resp_msg.status = DDS_VNET_FAIL;
839 	}
840 	rv = vsw_send_msg(ldcp, dmsg, sizeof (vio_dds_msg_t), B_FALSE);
841 	D1(vswp, "%s:exit rv=%d\n", __func__, rv);
842 	return (rv);
843 }
844 
845 /*
846  * vsw_hio_port_update -- update Hybrid mode change for a port.
847  */
848 void
849 vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled)
850 {
851 	/* Verify if the mode really changed */
852 	if (portp->p_hio_enabled == hio_enabled) {
853 		return;
854 	}
855 
856 	if (hio_enabled == B_FALSE) {
857 		/* Hybrid Mode is disabled, so stop HybridIO */
858 		vsw_hio_stop_port(portp);
859 		portp->p_hio_enabled = B_FALSE;
860 	} else {
861 		portp->p_hio_enabled =  B_TRUE;
862 		/* reset the port to initiate HybridIO setup */
863 		vsw_hio_port_reset(portp, B_FALSE);
864 	}
865 }
866 
867 /*
868  * vsw_hio_stop_port -- Stop HybridIO for a given port. Sequence
869  *	followed is similar to vsw_hio_free_all_shares().
870  *
871  */
872 void
873 vsw_hio_stop_port(vsw_port_t *portp)
874 {
875 	vsw_t *vswp = portp->p_vswp;
876 	vsw_share_t *vsharep;
877 	int max_retries = vsw_hio_max_cleanup_retries;
878 
879 	D1(vswp, "%s:enter\n", __func__);
880 	mutex_enter(&vswp->hw_lock);
881 
882 	if (vswp->hio_capable == B_FALSE) {
883 		mutex_exit(&vswp->hw_lock);
884 		return;
885 	}
886 
887 	vsharep = vsw_hio_find_vshare_port(vswp, portp);
888 	if (vsharep == NULL) {
889 		mutex_exit(&vswp->hw_lock);
890 		return;
891 	}
892 
893 	do {
894 		if (vsharep->vs_state & VSW_SHARE_DDS_ACKD) {
895 			int rv;
896 
897 			/* send DDS_DEL_SHARE */
898 			D1(vswp, "%s:sending DEL_SHARE msg for "
899 			    "share(%d)", __func__, vsharep->vs_index);
900 			rv = vsw_hio_send_delshare_msg(vsharep);
901 			if (rv != 0) {
902 				/*
903 				 * Cause a port reset to trigger
904 				 * cleanup.
905 				 */
906 				vsw_hio_port_reset(vsharep->vs_portp, B_FALSE);
907 			}
908 		}
909 		if (max_retries == 1) {
910 			/* last retry */
911 			DWARN(vswp, "%s:All retries failed, "
912 			    " cause a reset to trigger cleanup for "
913 			    "share(%d)", __func__, vsharep->vs_index);
914 			vsw_hio_port_reset(vsharep->vs_portp, B_FALSE);
915 		}
916 
917 		/* Check if the share still assigned to this port */
918 		if ((vsharep->vs_portp != portp) ||
919 		    (vsharep->vs_state == VSW_SHARE_FREE)) {
920 			break;
921 		}
922 
923 		/*
924 		 * Release the lock so that reply for DEL_SHARE
925 		 * messages come and get processed, that is, shares
926 		 * get freed.
927 		 */
928 		mutex_exit(&vswp->hw_lock);
929 		drv_usecwait(vsw_hio_cleanup_delay);
930 		mutex_enter(&vswp->hw_lock);
931 
932 		/* Check if the share still assigned to this port */
933 		if ((vsharep->vs_portp != portp) ||
934 		    (vsharep->vs_state == VSW_SHARE_FREE)) {
935 			break;
936 		}
937 		max_retries--;
938 	} while ((vsharep->vs_state != VSW_SHARE_FREE) && (max_retries > 0));
939 
940 	mutex_exit(&vswp->hw_lock);
941 	D1(vswp, "%s:exit\n", __func__);
942 }
943 
944 /*
945  * vsw_hio_rest_all -- Resets all ports that have shares allocated.
946  *	It is called only in the panic code path, so the LDC channels
947  *	are reset immediately.
948  */
949 static void
950 vsw_hio_reset_all(vsw_t *vswp)
951 {
952 	vsw_hio_t	*hiop = &vswp->vhio;
953 	vsw_share_t	*vsharep;
954 	int		i;
955 
956 	D1(vswp, "%s:enter\n", __func__);
957 
958 	if (vswp->hio_capable != B_TRUE)
959 		return;
960 
961 	for (i = 0; i < hiop->vh_num_shares; i++) {
962 		vsharep = &hiop->vh_shares[i];
963 		if (vsharep->vs_state == VSW_SHARE_FREE) {
964 			continue;
965 		}
966 		/*
967 		 * Reset the port with immediate flag enabled,
968 		 * to cause LDC reset immediately.
969 		 */
970 		vsw_hio_port_reset(vsharep->vs_portp, B_TRUE);
971 	}
972 	D1(vswp, "%s:exit\n", __func__);
973 }
974 
975 /*
976  * vsw_hio_reboot_callb -- Called for reboot event. It tries to
977  *	free all currently allocated shares.
978  */
979 /* ARGSUSED */
980 static boolean_t
981 vsw_hio_reboot_callb(void *arg, int code)
982 {
983 	vsw_t *vswp = arg;
984 
985 	D1(vswp, "%s:enter\n", __func__);
986 	vsw_hio_free_all_shares(vswp, B_TRUE);
987 	D1(vswp, "%s:exit\n", __func__);
988 	return (B_TRUE);
989 }
990 
991 /*
992  * vsw_hio_panic_callb -- Called from panic event. It resets all
993  *	the ports that have shares allocated. This is done to
994  *	trigger the cleanup in the guest ahead of HV reset.
995  */
996 /* ARGSUSED */
997 static boolean_t
998 vsw_hio_panic_callb(void *arg, int code)
999 {
1000 	vsw_t *vswp = arg;
1001 
1002 	D1(vswp, "%s:enter\n", __func__);
1003 	vsw_hio_reset_all(vswp);
1004 	D1(vswp, "%s:exit\n", __func__);
1005 	return (B_TRUE);
1006 }
1007 
1008 /*
1009  * Setup kstats for hio statistics.
1010  */
1011 static kstat_t *
1012 vsw_hio_setup_kstats(char *ks_mod, char *ks_name, vsw_t *vswp)
1013 {
1014 	kstat_t			*ksp;
1015 	vsw_hio_kstats_t	*hiokp;
1016 	vsw_hio_t		*hiop;
1017 	char			share_assigned_info[MAXNAMELEN];
1018 	size_t			size;
1019 	int			i;
1020 
1021 	hiop = &vswp->vhio;
1022 	/*
1023 	 * vsw_hio_stats_t structure is variable size structure
1024 	 * having fields defined only for one share. So, we need
1025 	 * allocate additional space for the rest of the shares.
1026 	 */
1027 	size = sizeof (vsw_hio_kstats_t) / sizeof (kstat_named_t);
1028 	ASSERT(hiop->vh_num_shares >= 1);
1029 	size += ((hiop->vh_num_shares - 1) * 2);
1030 
1031 	ksp = kstat_create(ks_mod, vswp->instance, ks_name, "misc",
1032 	    KSTAT_TYPE_NAMED, size, KSTAT_FLAG_VIRTUAL);
1033 
1034 	if (ksp == NULL) {
1035 		return (NULL);
1036 	}
1037 	hiokp = (vsw_hio_kstats_t *)kmem_zalloc(sizeof (kstat_named_t) *
1038 	    size, KM_SLEEP);
1039 	ksp->ks_data = hiokp;
1040 
1041 	hiop->vh_ksp = ksp;
1042 	hiop->vh_kstatsp = hiokp;
1043 	hiop->vh_kstat_size =  size;
1044 
1045 	kstat_named_init(&hiokp->hio_capable, "hio_capable", KSTAT_DATA_CHAR);
1046 	kstat_named_init(&hiokp->hio_num_shares, "hio_num_shares",
1047 	    KSTAT_DATA_ULONG);
1048 
1049 	for (i = 0; i < hiop->vh_num_shares; i++) {
1050 		(void) sprintf(share_assigned_info, "%s%d", "hio_share_", i);
1051 		kstat_named_init(&(hiokp->share[i].assigned),
1052 		    share_assigned_info, KSTAT_DATA_ULONG);
1053 
1054 		(void) sprintf(share_assigned_info, "%s%d%s",
1055 		    "hio_share_", i, "_state");
1056 		kstat_named_init(&(hiokp->share[i].state),
1057 		    share_assigned_info, KSTAT_DATA_ULONG);
1058 	}
1059 
1060 	ksp->ks_update = vsw_hio_kstats_update;
1061 	ksp->ks_private = (void *)vswp;
1062 	kstat_install(ksp);
1063 	return (ksp);
1064 }
1065 
1066 /*
1067  * Destroy hio kstats.
1068  */
1069 static void
1070 vsw_hio_destroy_kstats(vsw_t *vswp)
1071 {
1072 	kstat_t			*ksp;
1073 	vsw_hio_t		*hiop;
1074 
1075 	ASSERT(vswp != NULL);
1076 
1077 	ksp = vswp->vhio.vh_ksp;
1078 	hiop = &vswp->vhio;
1079 	if (ksp != NULL) {
1080 		kmem_free(hiop->vh_kstatsp, sizeof (kstat_named_t) *
1081 		    hiop->vh_kstat_size);
1082 		kstat_delete(ksp);
1083 		hiop->vh_kstatsp = NULL;
1084 		hiop->vh_ksp = NULL;
1085 	}
1086 }
1087 
1088 /*
1089  * Update hio kstats.
1090  */
1091 static int
1092 vsw_hio_kstats_update(kstat_t *ksp, int rw)
1093 {
1094 	vsw_t			*vswp;
1095 	vsw_hio_t		*hiop;
1096 	vsw_hio_kstats_t	*hiokp;
1097 	int			i;
1098 
1099 	vswp = (vsw_t *)ksp->ks_private;
1100 	ASSERT(vswp != NULL);
1101 
1102 	hiop = &vswp->vhio;
1103 	hiokp = hiop->vh_kstatsp;
1104 
1105 	if (rw == KSTAT_READ) {
1106 		if (vswp->hio_capable) {
1107 			(void) strcpy(hiokp->hio_capable.value.c, "Yes");
1108 		} else {
1109 			/* not hio capable, just return */
1110 			(void) strcpy(hiokp->hio_capable.value.c, "No");
1111 			return (0);
1112 		}
1113 
1114 		mutex_enter(&vswp->hw_lock);
1115 		hiokp->hio_num_shares.value.ul = (uint32_t)hiop->vh_num_shares;
1116 		for (i = 0; i < hiop->vh_num_shares; i++) {
1117 			hiokp->share[i].assigned.value.ul =
1118 			    hiop->vh_shares[i].vs_macaddr;
1119 			hiokp->share[i].state.value.ul =
1120 			    hiop->vh_shares[i].vs_state;
1121 		}
1122 		mutex_exit(&vswp->hw_lock);
1123 	} else {
1124 		return (EACCES);
1125 	}
1126 
1127 	return (0);
1128 }
1129