xref: /titanic_51/usr/src/uts/sun4v/io/vnet.c (revision a5669307eaef64af8519feb70d42f0aa0e7ec21a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/param.h>
30 #include <sys/stream.h>
31 #include <sys/kmem.h>
32 #include <sys/conf.h>
33 #include <sys/devops.h>
34 #include <sys/ksynch.h>
35 #include <sys/stat.h>
36 #include <sys/modctl.h>
37 #include <sys/modhash.h>
38 #include <sys/debug.h>
39 #include <sys/ethernet.h>
40 #include <sys/dlpi.h>
41 #include <net/if.h>
42 #include <sys/mac_provider.h>
43 #include <sys/mac_ether.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/strsun.h>
47 #include <sys/note.h>
48 #include <sys/atomic.h>
49 #include <sys/vnet.h>
50 #include <sys/vlan.h>
51 #include <sys/vnet_mailbox.h>
52 #include <sys/vnet_common.h>
53 #include <sys/dds.h>
54 #include <sys/strsubr.h>
55 #include <sys/taskq.h>
56 
57 /*
58  * Function prototypes.
59  */
60 
61 /* DDI entrypoints */
62 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
63 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
64 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
65 
66 /* MAC entrypoints  */
67 static int vnet_m_stat(void *, uint_t, uint64_t *);
68 static int vnet_m_start(void *);
69 static void vnet_m_stop(void *);
70 static int vnet_m_promisc(void *, boolean_t);
71 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
72 static int vnet_m_unicst(void *, const uint8_t *);
73 mblk_t *vnet_m_tx(void *, mblk_t *);
74 
75 /* vnet internal functions */
76 static int vnet_unattach(vnet_t *vnetp);
77 static int vnet_mac_register(vnet_t *);
78 static int vnet_read_mac_address(vnet_t *vnetp);
79 
80 /* Forwarding database (FDB) routines */
81 static void vnet_fdb_create(vnet_t *vnetp);
82 static void vnet_fdb_destroy(vnet_t *vnetp);
83 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp);
84 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
85 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp);
86 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp);
87 
88 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp);
89 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp);
90 static void vnet_tx_update(vio_net_handle_t vrh);
91 static void vnet_res_start_task(void *arg);
92 static void vnet_start_resources(vnet_t *vnetp);
93 static void vnet_stop_resources(vnet_t *vnetp);
94 static void vnet_dispatch_res_task(vnet_t *vnetp);
95 static void vnet_res_start_task(void *arg);
96 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
97 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
98 
99 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name,
100     vnet_res_t *vresp);
101 static int vnet_hio_update_kstats(kstat_t *ksp, int rw);
102 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp);
103 static void vnet_hio_destroy_kstats(kstat_t *ksp);
104 
105 /* Exported to to vnet_dds */
106 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
107 
108 /* Externs that are imported from vnet_gen */
109 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
110     const uint8_t *macaddr, void **vgenhdl);
111 extern int vgen_uninit(void *arg);
112 extern int vgen_dds_tx(void *arg, void *dmsg);
113 extern void vgen_mod_init(void);
114 extern int vgen_mod_cleanup(void);
115 extern void vgen_mod_fini(void);
116 
117 /* Externs that are imported from vnet_dds */
118 extern void vdds_mod_init(void);
119 extern void vdds_mod_fini(void);
120 extern int vdds_init(vnet_t *vnetp);
121 extern void vdds_cleanup(vnet_t *vnetp);
122 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
123 extern void vdds_cleanup_hybrid_res(void *arg);
124 
125 #define	DRV_NAME	"vnet"
126 #define	VNET_FDBE_REFHOLD(p)						\
127 {									\
128 	atomic_inc_32(&(p)->refcnt);					\
129 	ASSERT((p)->refcnt != 0);					\
130 }
131 
132 #define	VNET_FDBE_REFRELE(p)						\
133 {									\
134 	ASSERT((p)->refcnt != 0);					\
135 	atomic_dec_32(&(p)->refcnt);					\
136 }
137 
138 static mac_callbacks_t vnet_m_callbacks = {
139 	0,
140 	vnet_m_stat,
141 	vnet_m_start,
142 	vnet_m_stop,
143 	vnet_m_promisc,
144 	vnet_m_multicst,
145 	vnet_m_unicst,
146 	vnet_m_tx,
147 	NULL,
148 	NULL,
149 	NULL
150 };
151 
152 /*
153  * Linked list of "vnet_t" structures - one per instance.
154  */
155 static vnet_t	*vnet_headp = NULL;
156 static krwlock_t vnet_rw;
157 
158 /* Tunables */
159 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
160 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
161 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
162 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
163 
164 /*
165  * Set this to non-zero to enable additional internal receive buffer pools
166  * based on the MTU of the device for better performance at the cost of more
167  * memory consumption. This is turned off by default, to use allocb(9F) for
168  * receive buffer allocations of sizes > 2K.
169  */
170 boolean_t vnet_jumbo_rxpools = B_FALSE;
171 
172 /* # of chains in fdb hash table */
173 uint32_t	vnet_fdb_nchains = VNET_NFDB_HASH;
174 
175 /* Internal tunables */
176 uint32_t	vnet_ethermtu = 1500;	/* mtu of the device */
177 
178 /*
179  * Default vlan id. This is only used internally when the "default-vlan-id"
180  * property is not present in the MD device node. Therefore, this should not be
181  * used as a tunable; if this value is changed, the corresponding variable
182  * should be updated to the same value in vsw and also other vnets connected to
183  * the same vsw.
184  */
185 uint16_t	vnet_default_vlan_id = 1;
186 
187 /* delay in usec to wait for all references on a fdb entry to be dropped */
188 uint32_t vnet_fdbe_refcnt_delay = 10;
189 
190 static struct ether_addr etherbroadcastaddr = {
191 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
192 };
193 
194 
195 /*
196  * Property names
197  */
198 static char macaddr_propname[] = "local-mac-address";
199 
200 /*
201  * This is the string displayed by modinfo(1m).
202  */
203 static char vnet_ident[] = "vnet driver";
204 extern struct mod_ops mod_driverops;
205 static struct cb_ops cb_vnetops = {
206 	nulldev,		/* cb_open */
207 	nulldev,		/* cb_close */
208 	nodev,			/* cb_strategy */
209 	nodev,			/* cb_print */
210 	nodev,			/* cb_dump */
211 	nodev,			/* cb_read */
212 	nodev,			/* cb_write */
213 	nodev,			/* cb_ioctl */
214 	nodev,			/* cb_devmap */
215 	nodev,			/* cb_mmap */
216 	nodev,			/* cb_segmap */
217 	nochpoll,		/* cb_chpoll */
218 	ddi_prop_op,		/* cb_prop_op */
219 	NULL,			/* cb_stream */
220 	(int)(D_MP)		/* cb_flag */
221 };
222 
223 static struct dev_ops vnetops = {
224 	DEVO_REV,		/* devo_rev */
225 	0,			/* devo_refcnt */
226 	NULL,			/* devo_getinfo */
227 	nulldev,		/* devo_identify */
228 	nulldev,		/* devo_probe */
229 	vnetattach,		/* devo_attach */
230 	vnetdetach,		/* devo_detach */
231 	nodev,			/* devo_reset */
232 	&cb_vnetops,		/* devo_cb_ops */
233 	(struct bus_ops *)NULL,	/* devo_bus_ops */
234 	NULL,			/* devo_power */
235 	ddi_quiesce_not_supported,	/* devo_quiesce */
236 };
237 
238 static struct modldrv modldrv = {
239 	&mod_driverops,		/* Type of module.  This one is a driver */
240 	vnet_ident,		/* ID string */
241 	&vnetops		/* driver specific ops */
242 };
243 
244 static struct modlinkage modlinkage = {
245 	MODREV_1, (void *)&modldrv, NULL
246 };
247 
248 #ifdef DEBUG
249 
250 /*
251  * Print debug messages - set to 0xf to enable all msgs
252  */
253 int vnet_dbglevel = 0x8;
254 
255 static void
256 debug_printf(const char *fname, void *arg, const char *fmt, ...)
257 {
258 	char    buf[512];
259 	va_list ap;
260 	vnet_t *vnetp = (vnet_t *)arg;
261 	char    *bufp = buf;
262 
263 	if (vnetp == NULL) {
264 		(void) sprintf(bufp, "%s: ", fname);
265 		bufp += strlen(bufp);
266 	} else {
267 		(void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
268 		bufp += strlen(bufp);
269 	}
270 	va_start(ap, fmt);
271 	(void) vsprintf(bufp, fmt, ap);
272 	va_end(ap);
273 	cmn_err(CE_CONT, "%s\n", buf);
274 }
275 
276 #endif
277 
278 /* _init(9E): initialize the loadable module */
279 int
280 _init(void)
281 {
282 	int status;
283 
284 	DBG1(NULL, "enter\n");
285 
286 	mac_init_ops(&vnetops, "vnet");
287 	status = mod_install(&modlinkage);
288 	if (status != 0) {
289 		mac_fini_ops(&vnetops);
290 	}
291 	vdds_mod_init();
292 	vgen_mod_init();
293 	DBG1(NULL, "exit(%d)\n", status);
294 	return (status);
295 }
296 
297 /* _fini(9E): prepare the module for unloading. */
298 int
299 _fini(void)
300 {
301 	int		status;
302 
303 	DBG1(NULL, "enter\n");
304 
305 	status = vgen_mod_cleanup();
306 	if (status != 0)
307 		return (status);
308 
309 	status = mod_remove(&modlinkage);
310 	if (status != 0)
311 		return (status);
312 	mac_fini_ops(&vnetops);
313 	vgen_mod_fini();
314 	vdds_mod_fini();
315 
316 	DBG1(NULL, "exit(%d)\n", status);
317 	return (status);
318 }
319 
320 /* _info(9E): return information about the loadable module */
321 int
322 _info(struct modinfo *modinfop)
323 {
324 	return (mod_info(&modlinkage, modinfop));
325 }
326 
327 /*
328  * attach(9E): attach a device to the system.
329  * called once for each instance of the device on the system.
330  */
331 static int
332 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
333 {
334 	vnet_t			*vnetp;
335 	int			status;
336 	int			instance;
337 	uint64_t		reg;
338 	char			qname[TASKQ_NAMELEN];
339 	vnet_attach_progress_t	attach_progress;
340 
341 	attach_progress = AST_init;
342 
343 	switch (cmd) {
344 	case DDI_ATTACH:
345 		break;
346 	case DDI_RESUME:
347 	case DDI_PM_RESUME:
348 	default:
349 		goto vnet_attach_fail;
350 	}
351 
352 	instance = ddi_get_instance(dip);
353 	DBG1(NULL, "instance(%d) enter\n", instance);
354 
355 	/* allocate vnet_t and mac_t structures */
356 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
357 	vnetp->dip = dip;
358 	vnetp->instance = instance;
359 	rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL);
360 	rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
361 	attach_progress |= AST_vnet_alloc;
362 
363 	status = vdds_init(vnetp);
364 	if (status != 0) {
365 		goto vnet_attach_fail;
366 	}
367 	attach_progress |= AST_vdds_init;
368 
369 	/* setup links to vnet_t from both devinfo and mac_t */
370 	ddi_set_driver_private(dip, (caddr_t)vnetp);
371 
372 	/* read the mac address */
373 	status = vnet_read_mac_address(vnetp);
374 	if (status != DDI_SUCCESS) {
375 		goto vnet_attach_fail;
376 	}
377 	attach_progress |= AST_read_macaddr;
378 
379 	reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
380 	    DDI_PROP_DONTPASS, "reg", -1);
381 	if (reg == -1) {
382 		goto vnet_attach_fail;
383 	}
384 	vnetp->reg = reg;
385 
386 	vnet_fdb_create(vnetp);
387 	attach_progress |= AST_fdbh_alloc;
388 
389 	(void) snprintf(qname, TASKQ_NAMELEN, "vnet_taskq%d", instance);
390 	if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1,
391 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
392 		cmn_err(CE_WARN, "!vnet%d: Unable to create task queue",
393 		    instance);
394 		goto vnet_attach_fail;
395 	}
396 	attach_progress |= AST_taskq_create;
397 
398 	/* add to the list of vnet devices */
399 	WRITE_ENTER(&vnet_rw);
400 	vnetp->nextp = vnet_headp;
401 	vnet_headp = vnetp;
402 	RW_EXIT(&vnet_rw);
403 
404 	attach_progress |= AST_vnet_list;
405 
406 	/*
407 	 * Initialize the generic vnet plugin which provides
408 	 * communication via sun4v LDC (logical domain channel) based
409 	 * resources. It will register the LDC resources as and when
410 	 * they become available.
411 	 */
412 	status = vgen_init(vnetp, reg, vnetp->dip,
413 	    (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
414 	if (status != DDI_SUCCESS) {
415 		DERR(vnetp, "vgen_init() failed\n");
416 		goto vnet_attach_fail;
417 	}
418 	attach_progress |= AST_vgen_init;
419 
420 	/* register with MAC layer */
421 	status = vnet_mac_register(vnetp);
422 	if (status != DDI_SUCCESS) {
423 		goto vnet_attach_fail;
424 	}
425 
426 	attach_progress |= AST_macreg;
427 
428 	vnetp->attach_progress = attach_progress;
429 
430 	DBG1(NULL, "instance(%d) exit\n", instance);
431 	return (DDI_SUCCESS);
432 
433 vnet_attach_fail:
434 	vnetp->attach_progress = attach_progress;
435 	vnet_unattach(vnetp);
436 	return (DDI_FAILURE);
437 }
438 
439 /*
440  * detach(9E): detach a device from the system.
441  */
442 static int
443 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
444 {
445 	vnet_t		*vnetp;
446 	int		instance;
447 
448 	instance = ddi_get_instance(dip);
449 	DBG1(NULL, "instance(%d) enter\n", instance);
450 
451 	vnetp = ddi_get_driver_private(dip);
452 	if (vnetp == NULL) {
453 		goto vnet_detach_fail;
454 	}
455 
456 	switch (cmd) {
457 	case DDI_DETACH:
458 		break;
459 	case DDI_SUSPEND:
460 	case DDI_PM_SUSPEND:
461 	default:
462 		goto vnet_detach_fail;
463 	}
464 
465 	if (vnet_unattach(vnetp) != 0) {
466 		goto vnet_detach_fail;
467 	}
468 
469 	return (DDI_SUCCESS);
470 
471 vnet_detach_fail:
472 	return (DDI_FAILURE);
473 }
474 
475 /*
476  * Common routine to handle vnetattach() failure and vnetdetach(). Note that
477  * the only reason this function could fail is if mac_unregister() fails.
478  * Otherwise, this function must ensure that all resources are freed and return
479  * success.
480  */
481 static int
482 vnet_unattach(vnet_t *vnetp)
483 {
484 	vnet_attach_progress_t	attach_progress;
485 
486 	attach_progress = vnetp->attach_progress;
487 
488 	/*
489 	 * Unregister from the gldv3 subsystem. This can fail, in particular
490 	 * if there are still any open references to this mac device; in which
491 	 * case we just return failure without continuing to detach further.
492 	 */
493 	if (attach_progress & AST_macreg) {
494 		if (mac_unregister(vnetp->mh) != 0) {
495 			return (1);
496 		}
497 		attach_progress &= ~AST_macreg;
498 	}
499 
500 	/*
501 	 * Now that we have unregistered from gldv3, we must finish all other
502 	 * steps and successfully return from this function; otherwise we will
503 	 * end up leaving the device in a broken/unusable state.
504 	 *
505 	 * First, release any hybrid resources assigned to this vnet device.
506 	 */
507 	if (attach_progress & AST_vdds_init) {
508 		vdds_cleanup(vnetp);
509 		attach_progress &= ~AST_vdds_init;
510 	}
511 
512 	/*
513 	 * Uninit vgen. This stops further mdeg callbacks to this vnet
514 	 * device and/or its ports; and detaches any existing ports.
515 	 */
516 	if (attach_progress & AST_vgen_init) {
517 		vgen_uninit(vnetp->vgenhdl);
518 		attach_progress &= ~AST_vgen_init;
519 	}
520 
521 	/* Destroy the taskq. */
522 	if (attach_progress & AST_taskq_create) {
523 		ddi_taskq_destroy(vnetp->taskqp);
524 		attach_progress &= ~AST_taskq_create;
525 	}
526 
527 	/* Destroy fdb. */
528 	if (attach_progress & AST_fdbh_alloc) {
529 		vnet_fdb_destroy(vnetp);
530 		attach_progress &= ~AST_fdbh_alloc;
531 	}
532 
533 	/* Remove from the device list */
534 	if (attach_progress & AST_vnet_list) {
535 		vnet_t		**vnetpp;
536 		/* unlink from instance(vnet_t) list */
537 		WRITE_ENTER(&vnet_rw);
538 		for (vnetpp = &vnet_headp; *vnetpp;
539 		    vnetpp = &(*vnetpp)->nextp) {
540 			if (*vnetpp == vnetp) {
541 				*vnetpp = vnetp->nextp;
542 				break;
543 			}
544 		}
545 		RW_EXIT(&vnet_rw);
546 		attach_progress &= ~AST_vnet_list;
547 	}
548 
549 	if (attach_progress & AST_vnet_alloc) {
550 		rw_destroy(&vnetp->vrwlock);
551 		rw_destroy(&vnetp->vsw_fp_rw);
552 		attach_progress &= ~AST_vnet_list;
553 		KMEM_FREE(vnetp);
554 	}
555 
556 	return (0);
557 }
558 
559 /* enable the device for transmit/receive */
560 static int
561 vnet_m_start(void *arg)
562 {
563 	vnet_t		*vnetp = arg;
564 
565 	DBG1(vnetp, "enter\n");
566 
567 	WRITE_ENTER(&vnetp->vrwlock);
568 	vnetp->flags |= VNET_STARTED;
569 	vnet_start_resources(vnetp);
570 	RW_EXIT(&vnetp->vrwlock);
571 
572 	DBG1(vnetp, "exit\n");
573 	return (VNET_SUCCESS);
574 
575 }
576 
577 /* stop transmit/receive for the device */
578 static void
579 vnet_m_stop(void *arg)
580 {
581 	vnet_t		*vnetp = arg;
582 
583 	DBG1(vnetp, "enter\n");
584 
585 	WRITE_ENTER(&vnetp->vrwlock);
586 	if (vnetp->flags & VNET_STARTED) {
587 		vnet_stop_resources(vnetp);
588 		vnetp->flags &= ~VNET_STARTED;
589 	}
590 	RW_EXIT(&vnetp->vrwlock);
591 
592 	DBG1(vnetp, "exit\n");
593 }
594 
595 /* set the unicast mac address of the device */
596 static int
597 vnet_m_unicst(void *arg, const uint8_t *macaddr)
598 {
599 	_NOTE(ARGUNUSED(macaddr))
600 
601 	vnet_t *vnetp = arg;
602 
603 	DBG1(vnetp, "enter\n");
604 	/*
605 	 * NOTE: setting mac address dynamically is not supported.
606 	 */
607 	DBG1(vnetp, "exit\n");
608 
609 	return (VNET_FAILURE);
610 }
611 
612 /* enable/disable a multicast address */
613 static int
614 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
615 {
616 	_NOTE(ARGUNUSED(add, mca))
617 
618 	vnet_t *vnetp = arg;
619 	vnet_res_t	*vresp;
620 	mac_register_t	*macp;
621 	mac_callbacks_t	*cbp;
622 	int rv = VNET_SUCCESS;
623 
624 	DBG1(vnetp, "enter\n");
625 
626 	READ_ENTER(&vnetp->vrwlock);
627 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
628 		if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
629 			macp = &vresp->macreg;
630 			cbp = macp->m_callbacks;
631 			rv = cbp->mc_multicst(macp->m_driver, add, mca);
632 		}
633 	}
634 	RW_EXIT(&vnetp->vrwlock);
635 
636 	DBG1(vnetp, "exit(%d)\n", rv);
637 	return (rv);
638 }
639 
640 /* set or clear promiscuous mode on the device */
641 static int
642 vnet_m_promisc(void *arg, boolean_t on)
643 {
644 	_NOTE(ARGUNUSED(on))
645 
646 	vnet_t *vnetp = arg;
647 	DBG1(vnetp, "enter\n");
648 	/*
649 	 * NOTE: setting promiscuous mode is not supported, just return success.
650 	 */
651 	DBG1(vnetp, "exit\n");
652 	return (VNET_SUCCESS);
653 }
654 
655 /*
656  * Transmit a chain of packets. This function provides switching functionality
657  * based on the destination mac address to reach other guests (within ldoms) or
658  * external hosts.
659  */
660 mblk_t *
661 vnet_m_tx(void *arg, mblk_t *mp)
662 {
663 	vnet_t			*vnetp;
664 	vnet_res_t		*vresp;
665 	mblk_t			*next;
666 	mblk_t			*resid_mp;
667 	mac_register_t		*macp;
668 	struct ether_header	*ehp;
669 	boolean_t		is_unicast;
670 	boolean_t		is_pvid;	/* non-default pvid ? */
671 	boolean_t		hres;		/* Hybrid resource ? */
672 
673 	vnetp = (vnet_t *)arg;
674 	DBG1(vnetp, "enter\n");
675 	ASSERT(mp != NULL);
676 
677 	is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE;
678 
679 	while (mp != NULL) {
680 
681 		next = mp->b_next;
682 		mp->b_next = NULL;
683 
684 		/*
685 		 * Find fdb entry for the destination
686 		 * and hold a reference to it.
687 		 */
688 		ehp = (struct ether_header *)mp->b_rptr;
689 		vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
690 		if (vresp != NULL) {
691 
692 			/*
693 			 * Destination found in FDB.
694 			 * The destination is a vnet device within ldoms
695 			 * and directly reachable, invoke the tx function
696 			 * in the fdb entry.
697 			 */
698 			macp = &vresp->macreg;
699 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
700 
701 			/* tx done; now release ref on fdb entry */
702 			VNET_FDBE_REFRELE(vresp);
703 
704 			if (resid_mp != NULL) {
705 				/* m_tx failed */
706 				mp->b_next = next;
707 				break;
708 			}
709 		} else {
710 			is_unicast = !(IS_BROADCAST(ehp) ||
711 			    (IS_MULTICAST(ehp)));
712 			/*
713 			 * Destination is not in FDB.
714 			 * If the destination is broadcast or multicast,
715 			 * then forward the packet to vswitch.
716 			 * If a Hybrid resource avilable, then send the
717 			 * unicast packet via hybrid resource, otherwise
718 			 * forward it to vswitch.
719 			 */
720 			READ_ENTER(&vnetp->vsw_fp_rw);
721 
722 			if ((is_unicast) && (vnetp->hio_fp != NULL)) {
723 				vresp = vnetp->hio_fp;
724 				hres = B_TRUE;
725 			} else {
726 				vresp = vnetp->vsw_fp;
727 				hres = B_FALSE;
728 			}
729 			if (vresp == NULL) {
730 				/*
731 				 * no fdb entry to vsw? drop the packet.
732 				 */
733 				RW_EXIT(&vnetp->vsw_fp_rw);
734 				freemsg(mp);
735 				mp = next;
736 				continue;
737 			}
738 
739 			/* ref hold the fdb entry to vsw */
740 			VNET_FDBE_REFHOLD(vresp);
741 
742 			RW_EXIT(&vnetp->vsw_fp_rw);
743 
744 			/*
745 			 * In the case of a hybrid resource we need to insert
746 			 * the tag for the pvid case here; unlike packets that
747 			 * are destined to a vnet/vsw in which case the vgen
748 			 * layer does the tagging before sending it over ldc.
749 			 */
750 			if (hres == B_TRUE) {
751 				/*
752 				 * Determine if the frame being transmitted
753 				 * over the hybrid resource is untagged. If so,
754 				 * insert the tag before transmitting.
755 				 */
756 				if (is_pvid == B_TRUE &&
757 				    ehp->ether_type != htons(ETHERTYPE_VLAN)) {
758 
759 					mp = vnet_vlan_insert_tag(mp,
760 					    vnetp->pvid);
761 					if (mp == NULL) {
762 						VNET_FDBE_REFRELE(vresp);
763 						mp = next;
764 						continue;
765 					}
766 
767 				}
768 			}
769 
770 			macp = &vresp->macreg;
771 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
772 
773 			/* tx done; now release ref on fdb entry */
774 			VNET_FDBE_REFRELE(vresp);
775 
776 			if (resid_mp != NULL) {
777 				/* m_tx failed */
778 				mp->b_next = next;
779 				break;
780 			}
781 		}
782 
783 		mp = next;
784 	}
785 
786 	DBG1(vnetp, "exit\n");
787 	return (mp);
788 }
789 
790 /* get statistics from the device */
791 int
792 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
793 {
794 	vnet_t *vnetp = arg;
795 	vnet_res_t	*vresp;
796 	mac_register_t	*macp;
797 	mac_callbacks_t	*cbp;
798 	uint64_t val_total = 0;
799 
800 	DBG1(vnetp, "enter\n");
801 
802 	/*
803 	 * get the specified statistic from each transport and return the
804 	 * aggregate val.  This obviously only works for counters.
805 	 */
806 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
807 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
808 		return (ENOTSUP);
809 	}
810 
811 	READ_ENTER(&vnetp->vrwlock);
812 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
813 		macp = &vresp->macreg;
814 		cbp = macp->m_callbacks;
815 		if (cbp->mc_getstat(macp->m_driver, stat, val) == 0)
816 			val_total += *val;
817 	}
818 	RW_EXIT(&vnetp->vrwlock);
819 
820 	*val = val_total;
821 
822 	DBG1(vnetp, "exit\n");
823 	return (0);
824 }
825 
826 /* wrapper function for mac_register() */
827 static int
828 vnet_mac_register(vnet_t *vnetp)
829 {
830 	mac_register_t	*macp;
831 	int		err;
832 
833 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
834 		return (DDI_FAILURE);
835 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
836 	macp->m_driver = vnetp;
837 	macp->m_dip = vnetp->dip;
838 	macp->m_src_addr = vnetp->curr_macaddr;
839 	macp->m_callbacks = &vnet_m_callbacks;
840 	macp->m_min_sdu = 0;
841 	macp->m_max_sdu = vnetp->mtu;
842 	macp->m_margin = VLAN_TAGSZ;
843 
844 	/*
845 	 * Finally, we're ready to register ourselves with the MAC layer
846 	 * interface; if this succeeds, we're all ready to start()
847 	 */
848 	err = mac_register(macp, &vnetp->mh);
849 	mac_free(macp);
850 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
851 }
852 
853 /* read the mac address of the device */
854 static int
855 vnet_read_mac_address(vnet_t *vnetp)
856 {
857 	uchar_t 	*macaddr;
858 	uint32_t 	size;
859 	int 		rv;
860 
861 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
862 	    DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
863 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
864 		DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
865 		    macaddr_propname, rv);
866 		return (DDI_FAILURE);
867 	}
868 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
869 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
870 	ddi_prop_free(macaddr);
871 
872 	return (DDI_SUCCESS);
873 }
874 
875 static void
876 vnet_fdb_create(vnet_t *vnetp)
877 {
878 	char		hashname[MAXNAMELEN];
879 
880 	(void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
881 	    vnetp->instance);
882 	vnetp->fdb_nchains = vnet_fdb_nchains;
883 	vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
884 	    mod_hash_null_valdtor, sizeof (void *));
885 }
886 
887 static void
888 vnet_fdb_destroy(vnet_t *vnetp)
889 {
890 	/* destroy fdb-hash-table */
891 	if (vnetp->fdb_hashp != NULL) {
892 		mod_hash_destroy_hash(vnetp->fdb_hashp);
893 		vnetp->fdb_hashp = NULL;
894 		vnetp->fdb_nchains = 0;
895 	}
896 }
897 
898 /*
899  * Add an entry into the fdb.
900  */
901 void
902 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp)
903 {
904 	uint64_t	addr = 0;
905 	int		rv;
906 
907 	KEY_HASH(addr, vresp->rem_macaddr);
908 
909 	/*
910 	 * If the entry being added corresponds to LDC_SERVICE resource,
911 	 * that is, vswitch connection, it is added to the hash and also
912 	 * the entry is cached, an additional reference count reflects
913 	 * this. The HYBRID resource is not added to the hash, but only
914 	 * cached, as it is only used for sending out packets for unknown
915 	 * unicast destinations.
916 	 */
917 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
918 	    (vresp->refcnt = 1) : (vresp->refcnt = 0);
919 
920 	/*
921 	 * Note: duplicate keys will be rejected by mod_hash.
922 	 */
923 	if (vresp->type != VIO_NET_RES_HYBRID) {
924 		rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
925 		    (mod_hash_val_t)vresp);
926 		if (rv != 0) {
927 			DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
928 			return;
929 		}
930 	}
931 
932 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
933 		/* Cache the fdb entry to vsw-port */
934 		WRITE_ENTER(&vnetp->vsw_fp_rw);
935 		if (vnetp->vsw_fp == NULL)
936 			vnetp->vsw_fp = vresp;
937 		RW_EXIT(&vnetp->vsw_fp_rw);
938 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
939 		/* Cache the fdb entry to hybrid resource */
940 		WRITE_ENTER(&vnetp->vsw_fp_rw);
941 		if (vnetp->hio_fp == NULL)
942 			vnetp->hio_fp = vresp;
943 		RW_EXIT(&vnetp->vsw_fp_rw);
944 	}
945 }
946 
947 /*
948  * Remove an entry from fdb.
949  */
950 static void
951 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp)
952 {
953 	uint64_t	addr = 0;
954 	int		rv;
955 	uint32_t	refcnt;
956 	vnet_res_t	*tmp;
957 
958 	KEY_HASH(addr, vresp->rem_macaddr);
959 
960 	/*
961 	 * Remove the entry from fdb hash table.
962 	 * This prevents further references to this fdb entry.
963 	 */
964 	if (vresp->type != VIO_NET_RES_HYBRID) {
965 		rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
966 		    (mod_hash_val_t *)&tmp);
967 		if (rv != 0) {
968 			/*
969 			 * As the resources are added to the hash only
970 			 * after they are started, this can occur if
971 			 * a resource unregisters before it is ever started.
972 			 */
973 			return;
974 		}
975 	}
976 
977 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
978 		WRITE_ENTER(&vnetp->vsw_fp_rw);
979 
980 		ASSERT(tmp == vnetp->vsw_fp);
981 		vnetp->vsw_fp = NULL;
982 
983 		RW_EXIT(&vnetp->vsw_fp_rw);
984 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
985 		WRITE_ENTER(&vnetp->vsw_fp_rw);
986 
987 		vnetp->hio_fp = NULL;
988 
989 		RW_EXIT(&vnetp->vsw_fp_rw);
990 	}
991 
992 	/*
993 	 * If there are threads already ref holding before the entry was
994 	 * removed from hash table, then wait for ref count to drop to zero.
995 	 */
996 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
997 	    (refcnt = 1) : (refcnt = 0);
998 	while (vresp->refcnt > refcnt) {
999 		delay(drv_usectohz(vnet_fdbe_refcnt_delay));
1000 	}
1001 }
1002 
1003 /*
1004  * Search fdb for a given mac address. If an entry is found, hold
1005  * a reference to it and return the entry; else returns NULL.
1006  */
1007 static vnet_res_t *
1008 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
1009 {
1010 	uint64_t	key = 0;
1011 	vnet_res_t	*vresp;
1012 	int		rv;
1013 
1014 	KEY_HASH(key, addrp->ether_addr_octet);
1015 
1016 	rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
1017 	    (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb);
1018 
1019 	if (rv != 0)
1020 		return (NULL);
1021 
1022 	return (vresp);
1023 }
1024 
1025 /*
1026  * Callback function provided to mod_hash_find_cb(). After finding the fdb
1027  * entry corresponding to the key (macaddr), this callback will be invoked by
1028  * mod_hash_find_cb() to atomically increment the reference count on the fdb
1029  * entry before returning the found entry.
1030  */
1031 static void
1032 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
1033 {
1034 	_NOTE(ARGUNUSED(key))
1035 	VNET_FDBE_REFHOLD((vnet_res_t *)val);
1036 }
1037 
1038 /*
1039  * Frames received that are tagged with the pvid of the vnet device must be
1040  * untagged before sending up the stack. This function walks the chain of rx
1041  * frames, untags any such frames and returns the updated chain.
1042  *
1043  * Arguments:
1044  *    pvid:  pvid of the vnet device for which packets are being received
1045  *    mp:    head of pkt chain to be validated and untagged
1046  *
1047  * Returns:
1048  *    mp:    head of updated chain of packets
1049  */
1050 static void
1051 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp)
1052 {
1053 	struct ether_vlan_header	*evhp;
1054 	mblk_t				*bp;
1055 	mblk_t				*bpt;
1056 	mblk_t				*bph;
1057 	mblk_t				*bpn;
1058 
1059 	bpn = bph = bpt = NULL;
1060 
1061 	for (bp = *mp; bp != NULL; bp = bpn) {
1062 
1063 		bpn = bp->b_next;
1064 		bp->b_next = bp->b_prev = NULL;
1065 
1066 		evhp = (struct ether_vlan_header *)bp->b_rptr;
1067 
1068 		if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN &&
1069 		    VLAN_ID(ntohs(evhp->ether_tci)) == pvid) {
1070 
1071 			bp = vnet_vlan_remove_tag(bp);
1072 			if (bp == NULL) {
1073 				continue;
1074 			}
1075 
1076 		}
1077 
1078 		/* build a chain of processed packets */
1079 		if (bph == NULL) {
1080 			bph = bpt = bp;
1081 		} else {
1082 			bpt->b_next = bp;
1083 			bpt = bp;
1084 		}
1085 
1086 	}
1087 
1088 	*mp = bph;
1089 }
1090 
1091 static void
1092 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
1093 {
1094 	vnet_res_t	*vresp = (vnet_res_t *)vrh;
1095 	vnet_t		*vnetp = vresp->vnetp;
1096 
1097 	if ((vnetp == NULL) || (vnetp->mh == 0)) {
1098 		freemsgchain(mp);
1099 		return;
1100 	}
1101 
1102 	/*
1103 	 * Packets received over a hybrid resource need additional processing
1104 	 * to remove the tag, for the pvid case. The underlying resource is
1105 	 * not aware of the vnet's pvid and thus packets are received with the
1106 	 * vlan tag in the header; unlike packets that are received over a ldc
1107 	 * channel in which case the peer vnet/vsw would have already removed
1108 	 * the tag.
1109 	 */
1110 	if (vresp->type == VIO_NET_RES_HYBRID &&
1111 	    vnetp->pvid != vnetp->default_vlan_id) {
1112 
1113 		vnet_rx_frames_untag(vnetp->pvid, &mp);
1114 		if (mp == NULL) {
1115 			return;
1116 		}
1117 	}
1118 
1119 	mac_rx(vnetp->mh, NULL, mp);
1120 }
1121 
1122 void
1123 vnet_tx_update(vio_net_handle_t vrh)
1124 {
1125 	vnet_res_t *vresp = (vnet_res_t *)vrh;
1126 	vnet_t *vnetp = vresp->vnetp;
1127 
1128 	if ((vnetp != NULL) && (vnetp->mh != NULL)) {
1129 		mac_tx_update(vnetp->mh);
1130 	}
1131 }
1132 
1133 /*
1134  * Update the new mtu of vnet into the mac layer. First check if the device has
1135  * been plumbed and if so fail the mtu update. Returns 0 on success.
1136  */
1137 int
1138 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu)
1139 {
1140 	int	rv;
1141 
1142 	if (vnetp == NULL || vnetp->mh == NULL) {
1143 		return (EINVAL);
1144 	}
1145 
1146 	WRITE_ENTER(&vnetp->vrwlock);
1147 
1148 	if (vnetp->flags & VNET_STARTED) {
1149 		RW_EXIT(&vnetp->vrwlock);
1150 		cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu "
1151 		    "update as the device is plumbed\n",
1152 		    vnetp->instance);
1153 		return (EBUSY);
1154 	}
1155 
1156 	/* update mtu in the mac layer */
1157 	rv = mac_maxsdu_update(vnetp->mh, mtu);
1158 	if (rv != 0) {
1159 		RW_EXIT(&vnetp->vrwlock);
1160 		cmn_err(CE_NOTE,
1161 		    "!vnet%d: Unable to update mtu with mac layer\n",
1162 		    vnetp->instance);
1163 		return (EIO);
1164 	}
1165 
1166 	vnetp->mtu = mtu;
1167 
1168 	RW_EXIT(&vnetp->vrwlock);
1169 
1170 	return (0);
1171 }
1172 
1173 /*
1174  * vio_net_resource_reg -- An interface called to register a resource
1175  *	with vnet.
1176  *	macp -- a GLDv3 mac_register that has all the details of
1177  *		a resource and its callbacks etc.
1178  *	type -- resource type.
1179  *	local_macaddr -- resource's MAC address. This is used to
1180  *			 associate a resource with a corresponding vnet.
1181  *	remote_macaddr -- remote side MAC address. This is ignored for
1182  *			  the Hybrid resources.
1183  *	vhp -- A handle returned to the caller.
1184  *	vcb -- A set of callbacks provided to the callers.
1185  */
1186 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
1187     ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
1188     vio_net_callbacks_t *vcb)
1189 {
1190 	vnet_t	*vnetp;
1191 	vnet_res_t *vresp;
1192 
1193 	vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
1194 	ether_copy(local_macaddr, vresp->local_macaddr);
1195 	ether_copy(rem_macaddr, vresp->rem_macaddr);
1196 	vresp->type = type;
1197 	bcopy(macp, &vresp->macreg, sizeof (mac_register_t));
1198 
1199 	DBG1(NULL, "Resource Registerig type=0%X\n", type);
1200 
1201 	READ_ENTER(&vnet_rw);
1202 	vnetp = vnet_headp;
1203 	while (vnetp != NULL) {
1204 		if (VNET_MATCH_RES(vresp, vnetp)) {
1205 			vresp->vnetp = vnetp;
1206 
1207 			/* Setup kstats for hio resource */
1208 			if (vresp->type == VIO_NET_RES_HYBRID) {
1209 				vresp->ksp = vnet_hio_setup_kstats(DRV_NAME,
1210 				    "hio", vresp);
1211 				if (vresp->ksp == NULL) {
1212 					cmn_err(CE_NOTE, "!vnet%d: Cannot "
1213 					    "create kstats for hio resource",
1214 					    vnetp->instance);
1215 				}
1216 			}
1217 
1218 			WRITE_ENTER(&vnetp->vrwlock);
1219 			vresp->nextp = vnetp->vres_list;
1220 			vnetp->vres_list = vresp;
1221 			RW_EXIT(&vnetp->vrwlock);
1222 			break;
1223 		}
1224 		vnetp = vnetp->nextp;
1225 	}
1226 	RW_EXIT(&vnet_rw);
1227 	if (vresp->vnetp == NULL) {
1228 		DWARN(NULL, "No vnet instance");
1229 		kmem_free(vresp, sizeof (vnet_res_t));
1230 		return (ENXIO);
1231 	}
1232 
1233 	*vhp = vresp;
1234 	vcb->vio_net_rx_cb = vnet_rx;
1235 	vcb->vio_net_tx_update = vnet_tx_update;
1236 	vcb->vio_net_report_err = vnet_handle_res_err;
1237 
1238 	/* Dispatch a task to start resources */
1239 	vnet_dispatch_res_task(vnetp);
1240 	return (0);
1241 }
1242 
1243 /*
1244  * vio_net_resource_unreg -- An interface to unregister a resource.
1245  */
1246 void
1247 vio_net_resource_unreg(vio_net_handle_t vhp)
1248 {
1249 	vnet_res_t *vresp = (vnet_res_t *)vhp;
1250 	vnet_t *vnetp = vresp->vnetp;
1251 	vnet_res_t *vrp;
1252 	kstat_t *ksp = NULL;
1253 
1254 	DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
1255 
1256 	ASSERT(vnetp != NULL);
1257 	vnet_fdbe_del(vnetp, vresp);
1258 
1259 	WRITE_ENTER(&vnetp->vrwlock);
1260 	if (vresp == vnetp->vres_list) {
1261 		vnetp->vres_list = vresp->nextp;
1262 	} else {
1263 		vrp = vnetp->vres_list;
1264 		while (vrp->nextp != NULL) {
1265 			if (vrp->nextp == vresp) {
1266 				vrp->nextp = vresp->nextp;
1267 				break;
1268 			}
1269 			vrp = vrp->nextp;
1270 		}
1271 	}
1272 
1273 	ksp = vresp->ksp;
1274 	vresp->ksp = NULL;
1275 
1276 	vresp->vnetp = NULL;
1277 	vresp->nextp = NULL;
1278 	RW_EXIT(&vnetp->vrwlock);
1279 	vnet_hio_destroy_kstats(ksp);
1280 	KMEM_FREE(vresp);
1281 }
1282 
1283 /*
1284  * vnet_dds_rx -- an interface called by vgen to DDS messages.
1285  */
1286 void
1287 vnet_dds_rx(void *arg, void *dmsg)
1288 {
1289 	vnet_t *vnetp = arg;
1290 	vdds_process_dds_msg(vnetp, dmsg);
1291 }
1292 
1293 /*
1294  * vnet_send_dds_msg -- An interface provided to DDS to send
1295  *	DDS messages. This simply sends meessages via vgen.
1296  */
1297 int
1298 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg)
1299 {
1300 	int rv;
1301 
1302 	if (vnetp->vgenhdl != NULL) {
1303 		rv = vgen_dds_tx(vnetp->vgenhdl, dmsg);
1304 	}
1305 	return (rv);
1306 }
1307 
1308 /*
1309  * vnet_handle_res_err -- A callback function called by a resource
1310  *	to report an error. For example, vgen can call to report
1311  *	an LDC down/reset event. This will trigger cleanup of associated
1312  *	Hybrid resource.
1313  */
1314 /* ARGSUSED */
1315 static void
1316 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err)
1317 {
1318 	vnet_res_t *vresp = (vnet_res_t *)vrh;
1319 	vnet_t *vnetp = vresp->vnetp;
1320 	int rv;
1321 
1322 	if (vnetp == NULL) {
1323 		return;
1324 	}
1325 	if ((vresp->type != VIO_NET_RES_LDC_SERVICE) &&
1326 	    (vresp->type != VIO_NET_RES_HYBRID)) {
1327 		return;
1328 	}
1329 	rv = ddi_taskq_dispatch(vnetp->taskqp, vdds_cleanup_hybrid_res,
1330 	    vnetp, DDI_NOSLEEP);
1331 	if (rv != DDI_SUCCESS) {
1332 		cmn_err(CE_WARN,
1333 		    "vnet%d:Failed to dispatch task to cleanup hybrid resource",
1334 		    vnetp->instance);
1335 	}
1336 }
1337 
1338 /*
1339  * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
1340  */
1341 static void
1342 vnet_dispatch_res_task(vnet_t *vnetp)
1343 {
1344 	int rv;
1345 
1346 	WRITE_ENTER(&vnetp->vrwlock);
1347 	if (vnetp->flags & VNET_STARTED) {
1348 		rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task,
1349 		    vnetp, DDI_NOSLEEP);
1350 		if (rv != DDI_SUCCESS) {
1351 			cmn_err(CE_WARN,
1352 			    "vnet%d:Can't dispatch start resource task",
1353 			    vnetp->instance);
1354 		}
1355 	}
1356 	RW_EXIT(&vnetp->vrwlock);
1357 }
1358 
1359 /*
1360  * vnet_res_start_task -- A taskq callback function that starts a resource.
1361  */
1362 static void
1363 vnet_res_start_task(void *arg)
1364 {
1365 	vnet_t *vnetp = arg;
1366 
1367 	WRITE_ENTER(&vnetp->vrwlock);
1368 	if (vnetp->flags & VNET_STARTED) {
1369 		vnet_start_resources(vnetp);
1370 	}
1371 	RW_EXIT(&vnetp->vrwlock);
1372 }
1373 
1374 /*
1375  * vnet_start_resources -- starts all resources associated with
1376  *	a vnet.
1377  */
1378 static void
1379 vnet_start_resources(vnet_t *vnetp)
1380 {
1381 	mac_register_t	*macp;
1382 	mac_callbacks_t	*cbp;
1383 	vnet_res_t	*vresp;
1384 	int rv;
1385 
1386 	DBG1(vnetp, "enter\n");
1387 
1388 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
1389 		/* skip if it is already started */
1390 		if (vresp->flags & VNET_STARTED) {
1391 			continue;
1392 		}
1393 		macp = &vresp->macreg;
1394 		cbp = macp->m_callbacks;
1395 		rv = cbp->mc_start(macp->m_driver);
1396 		if (rv == 0) {
1397 			/*
1398 			 * Successfully started the resource, so now
1399 			 * add it to the fdb.
1400 			 */
1401 			vresp->flags |= VNET_STARTED;
1402 			vnet_fdbe_add(vnetp, vresp);
1403 		}
1404 	}
1405 
1406 	DBG1(vnetp, "exit\n");
1407 
1408 }
1409 
1410 /*
1411  * vnet_stop_resources -- stop all resources associated with a vnet.
1412  */
1413 static void
1414 vnet_stop_resources(vnet_t *vnetp)
1415 {
1416 	vnet_res_t	*vresp;
1417 	vnet_res_t	*nvresp;
1418 	mac_register_t	*macp;
1419 	mac_callbacks_t	*cbp;
1420 
1421 	DBG1(vnetp, "enter\n");
1422 
1423 	for (vresp = vnetp->vres_list; vresp != NULL; ) {
1424 		nvresp = vresp->nextp;
1425 		if (vresp->flags & VNET_STARTED) {
1426 			macp = &vresp->macreg;
1427 			cbp = macp->m_callbacks;
1428 			cbp->mc_stop(macp->m_driver);
1429 			vresp->flags &= ~VNET_STARTED;
1430 		}
1431 		vresp = nvresp;
1432 	}
1433 	DBG1(vnetp, "exit\n");
1434 }
1435 
1436 /*
1437  * Setup kstats for the HIO statistics.
1438  * NOTE: the synchronization for the statistics is the
1439  * responsibility of the caller.
1440  */
1441 kstat_t *
1442 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp)
1443 {
1444 	kstat_t *ksp;
1445 	vnet_t *vnetp = vresp->vnetp;
1446 	vnet_hio_kstats_t *hiokp;
1447 	size_t size;
1448 
1449 	ASSERT(vnetp != NULL);
1450 	size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t);
1451 	ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net",
1452 	    KSTAT_TYPE_NAMED, size, 0);
1453 	if (ksp == NULL) {
1454 		return (NULL);
1455 	}
1456 
1457 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1458 	kstat_named_init(&hiokp->ipackets,		"ipackets",
1459 	    KSTAT_DATA_ULONG);
1460 	kstat_named_init(&hiokp->ierrors,		"ierrors",
1461 	    KSTAT_DATA_ULONG);
1462 	kstat_named_init(&hiokp->opackets,		"opackets",
1463 	    KSTAT_DATA_ULONG);
1464 	kstat_named_init(&hiokp->oerrors,		"oerrors",
1465 	    KSTAT_DATA_ULONG);
1466 
1467 
1468 	/* MIB II kstat variables */
1469 	kstat_named_init(&hiokp->rbytes,		"rbytes",
1470 	    KSTAT_DATA_ULONG);
1471 	kstat_named_init(&hiokp->obytes,		"obytes",
1472 	    KSTAT_DATA_ULONG);
1473 	kstat_named_init(&hiokp->multircv,		"multircv",
1474 	    KSTAT_DATA_ULONG);
1475 	kstat_named_init(&hiokp->multixmt,		"multixmt",
1476 	    KSTAT_DATA_ULONG);
1477 	kstat_named_init(&hiokp->brdcstrcv,		"brdcstrcv",
1478 	    KSTAT_DATA_ULONG);
1479 	kstat_named_init(&hiokp->brdcstxmt,		"brdcstxmt",
1480 	    KSTAT_DATA_ULONG);
1481 	kstat_named_init(&hiokp->norcvbuf,		"norcvbuf",
1482 	    KSTAT_DATA_ULONG);
1483 	kstat_named_init(&hiokp->noxmtbuf,		"noxmtbuf",
1484 	    KSTAT_DATA_ULONG);
1485 
1486 	ksp->ks_update = vnet_hio_update_kstats;
1487 	ksp->ks_private = (void *)vresp;
1488 	kstat_install(ksp);
1489 	return (ksp);
1490 }
1491 
1492 /*
1493  * Destroy kstats.
1494  */
1495 static void
1496 vnet_hio_destroy_kstats(kstat_t *ksp)
1497 {
1498 	if (ksp != NULL)
1499 		kstat_delete(ksp);
1500 }
1501 
1502 /*
1503  * Update the kstats.
1504  */
1505 static int
1506 vnet_hio_update_kstats(kstat_t *ksp, int rw)
1507 {
1508 	vnet_t *vnetp;
1509 	vnet_res_t *vresp;
1510 	vnet_hio_stats_t statsp;
1511 	vnet_hio_kstats_t *hiokp;
1512 
1513 	vresp = (vnet_res_t *)ksp->ks_private;
1514 	vnetp = vresp->vnetp;
1515 
1516 	bzero(&statsp, sizeof (vnet_hio_stats_t));
1517 
1518 	READ_ENTER(&vnetp->vsw_fp_rw);
1519 	if (vnetp->hio_fp == NULL) {
1520 		/* not using hio resources, just return */
1521 		RW_EXIT(&vnetp->vsw_fp_rw);
1522 		return (0);
1523 	}
1524 	VNET_FDBE_REFHOLD(vnetp->hio_fp);
1525 	RW_EXIT(&vnetp->vsw_fp_rw);
1526 	vnet_hio_get_stats(vnetp->hio_fp, &statsp);
1527 	VNET_FDBE_REFRELE(vnetp->hio_fp);
1528 
1529 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1530 
1531 	if (rw == KSTAT_READ) {
1532 		/* Link Input/Output stats */
1533 		hiokp->ipackets.value.ul	= (uint32_t)statsp.ipackets;
1534 		hiokp->ipackets64.value.ull	= statsp.ipackets;
1535 		hiokp->ierrors.value.ul		= statsp.ierrors;
1536 		hiokp->opackets.value.ul	= (uint32_t)statsp.opackets;
1537 		hiokp->opackets64.value.ull	= statsp.opackets;
1538 		hiokp->oerrors.value.ul		= statsp.oerrors;
1539 
1540 		/* MIB II kstat variables */
1541 		hiokp->rbytes.value.ul		= (uint32_t)statsp.rbytes;
1542 		hiokp->rbytes64.value.ull	= statsp.rbytes;
1543 		hiokp->obytes.value.ul		= (uint32_t)statsp.obytes;
1544 		hiokp->obytes64.value.ull	= statsp.obytes;
1545 		hiokp->multircv.value.ul	= statsp.multircv;
1546 		hiokp->multixmt.value.ul	= statsp.multixmt;
1547 		hiokp->brdcstrcv.value.ul	= statsp.brdcstrcv;
1548 		hiokp->brdcstxmt.value.ul	= statsp.brdcstxmt;
1549 		hiokp->norcvbuf.value.ul	= statsp.norcvbuf;
1550 		hiokp->noxmtbuf.value.ul	= statsp.noxmtbuf;
1551 	} else {
1552 		return (EACCES);
1553 	}
1554 
1555 	return (0);
1556 }
1557 
1558 static void
1559 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp)
1560 {
1561 	mac_register_t		*macp;
1562 	mac_callbacks_t		*cbp;
1563 	uint64_t		val;
1564 	int			stat;
1565 
1566 	/*
1567 	 * get the specified statistics from the underlying nxge.
1568 	 */
1569 	macp = &vresp->macreg;
1570 	cbp = macp->m_callbacks;
1571 	for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) {
1572 		if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) {
1573 			switch (stat) {
1574 			case MAC_STAT_IPACKETS:
1575 				statsp->ipackets = val;
1576 				break;
1577 
1578 			case MAC_STAT_IERRORS:
1579 				statsp->ierrors = val;
1580 				break;
1581 
1582 			case MAC_STAT_OPACKETS:
1583 				statsp->opackets = val;
1584 				break;
1585 
1586 			case MAC_STAT_OERRORS:
1587 				statsp->oerrors = val;
1588 				break;
1589 
1590 			case MAC_STAT_RBYTES:
1591 				statsp->rbytes = val;
1592 				break;
1593 
1594 			case MAC_STAT_OBYTES:
1595 				statsp->obytes = val;
1596 				break;
1597 
1598 			case MAC_STAT_MULTIRCV:
1599 				statsp->multircv = val;
1600 				break;
1601 
1602 			case MAC_STAT_MULTIXMT:
1603 				statsp->multixmt = val;
1604 				break;
1605 
1606 			case MAC_STAT_BRDCSTRCV:
1607 				statsp->brdcstrcv = val;
1608 				break;
1609 
1610 			case MAC_STAT_BRDCSTXMT:
1611 				statsp->brdcstxmt = val;
1612 				break;
1613 
1614 			case MAC_STAT_NOXMTBUF:
1615 				statsp->noxmtbuf = val;
1616 				break;
1617 
1618 			case MAC_STAT_NORCVBUF:
1619 				statsp->norcvbuf = val;
1620 				break;
1621 
1622 			default:
1623 				/*
1624 				 * parameters not interested.
1625 				 */
1626 				break;
1627 			}
1628 		}
1629 	}
1630 }
1631