xref: /titanic_50/usr/src/uts/sun4v/io/vnet.c (revision 585995d5d19489bf178112c08c8c61ffc049ff6e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/param.h>
30 #include <sys/stream.h>
31 #include <sys/kmem.h>
32 #include <sys/conf.h>
33 #include <sys/devops.h>
34 #include <sys/ksynch.h>
35 #include <sys/stat.h>
36 #include <sys/modctl.h>
37 #include <sys/modhash.h>
38 #include <sys/debug.h>
39 #include <sys/ethernet.h>
40 #include <sys/dlpi.h>
41 #include <net/if.h>
42 #include <sys/mac_provider.h>
43 #include <sys/mac_ether.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/strsun.h>
47 #include <sys/note.h>
48 #include <sys/atomic.h>
49 #include <sys/vnet.h>
50 #include <sys/vlan.h>
51 #include <sys/vnet_mailbox.h>
52 #include <sys/vnet_common.h>
53 #include <sys/dds.h>
54 #include <sys/strsubr.h>
55 #include <sys/taskq.h>
56 
57 /*
58  * Function prototypes.
59  */
60 
61 /* DDI entrypoints */
62 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
63 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
64 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
65 
66 /* MAC entrypoints  */
67 static int vnet_m_stat(void *, uint_t, uint64_t *);
68 static int vnet_m_start(void *);
69 static void vnet_m_stop(void *);
70 static int vnet_m_promisc(void *, boolean_t);
71 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
72 static int vnet_m_unicst(void *, const uint8_t *);
73 mblk_t *vnet_m_tx(void *, mblk_t *);
74 
75 /* vnet internal functions */
76 static int vnet_mac_register(vnet_t *);
77 static int vnet_read_mac_address(vnet_t *vnetp);
78 
79 /* Forwarding database (FDB) routines */
80 static void vnet_fdb_create(vnet_t *vnetp);
81 static void vnet_fdb_destroy(vnet_t *vnetp);
82 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp);
83 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
84 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp);
85 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp);
86 
87 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp);
88 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp);
89 static void vnet_tx_update(vio_net_handle_t vrh);
90 static void vnet_res_start_task(void *arg);
91 static void vnet_start_resources(vnet_t *vnetp);
92 static void vnet_stop_resources(vnet_t *vnetp);
93 static void vnet_dispatch_res_task(vnet_t *vnetp);
94 static void vnet_res_start_task(void *arg);
95 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
96 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
97 
98 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name,
99     vnet_res_t *vresp);
100 static int vnet_hio_update_kstats(kstat_t *ksp, int rw);
101 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp);
102 static void vnet_hio_destroy_kstats(kstat_t *ksp);
103 
104 /* Exported to to vnet_dds */
105 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
106 
107 /* Externs that are imported from vnet_gen */
108 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
109     const uint8_t *macaddr, void **vgenhdl);
110 extern int vgen_uninit(void *arg);
111 extern int vgen_dds_tx(void *arg, void *dmsg);
112 
113 /* Externs that are imported from vnet_dds */
114 extern void vdds_mod_init(void);
115 extern void vdds_mod_fini(void);
116 extern int vdds_init(vnet_t *vnetp);
117 extern void vdds_cleanup(vnet_t *vnetp);
118 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
119 extern void vdds_cleanup_hybrid_res(void *arg);
120 
121 #define	DRV_NAME	"vnet"
122 #define	VNET_FDBE_REFHOLD(p)						\
123 {									\
124 	atomic_inc_32(&(p)->refcnt);					\
125 	ASSERT((p)->refcnt != 0);					\
126 }
127 
128 #define	VNET_FDBE_REFRELE(p)						\
129 {									\
130 	ASSERT((p)->refcnt != 0);					\
131 	atomic_dec_32(&(p)->refcnt);					\
132 }
133 
134 static mac_callbacks_t vnet_m_callbacks = {
135 	0,
136 	vnet_m_stat,
137 	vnet_m_start,
138 	vnet_m_stop,
139 	vnet_m_promisc,
140 	vnet_m_multicst,
141 	vnet_m_unicst,
142 	vnet_m_tx,
143 	NULL,
144 	NULL,
145 	NULL
146 };
147 
148 /*
149  * Linked list of "vnet_t" structures - one per instance.
150  */
151 static vnet_t	*vnet_headp = NULL;
152 static krwlock_t vnet_rw;
153 
154 /* Tunables */
155 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
156 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
157 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
158 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
159 
160 /*
161  * Set this to non-zero to enable additional internal receive buffer pools
162  * based on the MTU of the device for better performance at the cost of more
163  * memory consumption. This is turned off by default, to use allocb(9F) for
164  * receive buffer allocations of sizes > 2K.
165  */
166 boolean_t vnet_jumbo_rxpools = B_FALSE;
167 
168 /* # of chains in fdb hash table */
169 uint32_t	vnet_fdb_nchains = VNET_NFDB_HASH;
170 
171 /* Internal tunables */
172 uint32_t	vnet_ethermtu = 1500;	/* mtu of the device */
173 
174 /*
175  * Default vlan id. This is only used internally when the "default-vlan-id"
176  * property is not present in the MD device node. Therefore, this should not be
177  * used as a tunable; if this value is changed, the corresponding variable
178  * should be updated to the same value in vsw and also other vnets connected to
179  * the same vsw.
180  */
181 uint16_t	vnet_default_vlan_id = 1;
182 
183 /* delay in usec to wait for all references on a fdb entry to be dropped */
184 uint32_t vnet_fdbe_refcnt_delay = 10;
185 
186 static struct ether_addr etherbroadcastaddr = {
187 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
188 };
189 
190 
191 /*
192  * Property names
193  */
194 static char macaddr_propname[] = "local-mac-address";
195 
196 /*
197  * This is the string displayed by modinfo(1m).
198  */
199 static char vnet_ident[] = "vnet driver";
200 extern struct mod_ops mod_driverops;
201 static struct cb_ops cb_vnetops = {
202 	nulldev,		/* cb_open */
203 	nulldev,		/* cb_close */
204 	nodev,			/* cb_strategy */
205 	nodev,			/* cb_print */
206 	nodev,			/* cb_dump */
207 	nodev,			/* cb_read */
208 	nodev,			/* cb_write */
209 	nodev,			/* cb_ioctl */
210 	nodev,			/* cb_devmap */
211 	nodev,			/* cb_mmap */
212 	nodev,			/* cb_segmap */
213 	nochpoll,		/* cb_chpoll */
214 	ddi_prop_op,		/* cb_prop_op */
215 	NULL,			/* cb_stream */
216 	(int)(D_MP)		/* cb_flag */
217 };
218 
219 static struct dev_ops vnetops = {
220 	DEVO_REV,		/* devo_rev */
221 	0,			/* devo_refcnt */
222 	NULL,			/* devo_getinfo */
223 	nulldev,		/* devo_identify */
224 	nulldev,		/* devo_probe */
225 	vnetattach,		/* devo_attach */
226 	vnetdetach,		/* devo_detach */
227 	nodev,			/* devo_reset */
228 	&cb_vnetops,		/* devo_cb_ops */
229 	(struct bus_ops *)NULL,	/* devo_bus_ops */
230 	NULL,			/* devo_power */
231 	ddi_quiesce_not_supported,	/* devo_quiesce */
232 };
233 
234 static struct modldrv modldrv = {
235 	&mod_driverops,		/* Type of module.  This one is a driver */
236 	vnet_ident,		/* ID string */
237 	&vnetops		/* driver specific ops */
238 };
239 
240 static struct modlinkage modlinkage = {
241 	MODREV_1, (void *)&modldrv, NULL
242 };
243 
244 #ifdef DEBUG
245 
246 /*
247  * Print debug messages - set to 0xf to enable all msgs
248  */
249 int vnet_dbglevel = 0x8;
250 
251 static void
252 debug_printf(const char *fname, void *arg, const char *fmt, ...)
253 {
254 	char    buf[512];
255 	va_list ap;
256 	vnet_t *vnetp = (vnet_t *)arg;
257 	char    *bufp = buf;
258 
259 	if (vnetp == NULL) {
260 		(void) sprintf(bufp, "%s: ", fname);
261 		bufp += strlen(bufp);
262 	} else {
263 		(void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
264 		bufp += strlen(bufp);
265 	}
266 	va_start(ap, fmt);
267 	(void) vsprintf(bufp, fmt, ap);
268 	va_end(ap);
269 	cmn_err(CE_CONT, "%s\n", buf);
270 }
271 
272 #endif
273 
274 /* _init(9E): initialize the loadable module */
275 int
276 _init(void)
277 {
278 	int status;
279 
280 	DBG1(NULL, "enter\n");
281 
282 	mac_init_ops(&vnetops, "vnet");
283 	status = mod_install(&modlinkage);
284 	if (status != 0) {
285 		mac_fini_ops(&vnetops);
286 	}
287 	vdds_mod_init();
288 	DBG1(NULL, "exit(%d)\n", status);
289 	return (status);
290 }
291 
292 /* _fini(9E): prepare the module for unloading. */
293 int
294 _fini(void)
295 {
296 	int status;
297 
298 	DBG1(NULL, "enter\n");
299 
300 	status = mod_remove(&modlinkage);
301 	if (status != 0)
302 		return (status);
303 	mac_fini_ops(&vnetops);
304 	vdds_mod_fini();
305 
306 	DBG1(NULL, "exit(%d)\n", status);
307 	return (status);
308 }
309 
310 /* _info(9E): return information about the loadable module */
311 int
312 _info(struct modinfo *modinfop)
313 {
314 	return (mod_info(&modlinkage, modinfop));
315 }
316 
317 /*
318  * attach(9E): attach a device to the system.
319  * called once for each instance of the device on the system.
320  */
321 static int
322 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
323 {
324 	vnet_t		*vnetp;
325 	int		status;
326 	int		instance;
327 	uint64_t	reg;
328 	char		qname[TASKQ_NAMELEN];
329 	enum	{ AST_init = 0x0, AST_vnet_alloc = 0x1,
330 		AST_mac_alloc = 0x2, AST_read_macaddr = 0x4,
331 		AST_vgen_init = 0x8, AST_fdbh_alloc = 0x10,
332 		AST_vdds_init = 0x20, AST_taskq_create = 0x40,
333 		AST_vnet_list = 0x80 } attach_state;
334 
335 	attach_state = AST_init;
336 
337 	switch (cmd) {
338 	case DDI_ATTACH:
339 		break;
340 	case DDI_RESUME:
341 	case DDI_PM_RESUME:
342 	default:
343 		goto vnet_attach_fail;
344 	}
345 
346 	instance = ddi_get_instance(dip);
347 	DBG1(NULL, "instance(%d) enter\n", instance);
348 
349 	/* allocate vnet_t and mac_t structures */
350 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
351 	vnetp->dip = dip;
352 	vnetp->instance = instance;
353 	rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL);
354 	rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
355 	attach_state |= AST_vnet_alloc;
356 
357 	status = vdds_init(vnetp);
358 	if (status != 0) {
359 		goto vnet_attach_fail;
360 	}
361 	attach_state |= AST_vdds_init;
362 
363 	/* setup links to vnet_t from both devinfo and mac_t */
364 	ddi_set_driver_private(dip, (caddr_t)vnetp);
365 
366 	/* read the mac address */
367 	status = vnet_read_mac_address(vnetp);
368 	if (status != DDI_SUCCESS) {
369 		goto vnet_attach_fail;
370 	}
371 	attach_state |= AST_read_macaddr;
372 
373 	reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
374 	    DDI_PROP_DONTPASS, "reg", -1);
375 	if (reg == -1) {
376 		goto vnet_attach_fail;
377 	}
378 	vnetp->reg = reg;
379 
380 	vnet_fdb_create(vnetp);
381 	attach_state |= AST_fdbh_alloc;
382 
383 	(void) snprintf(qname, TASKQ_NAMELEN, "vnet_taskq%d", instance);
384 	if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1,
385 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
386 		cmn_err(CE_WARN, "!vnet%d: Unable to create task queue",
387 		    instance);
388 		goto vnet_attach_fail;
389 	}
390 	attach_state |= AST_taskq_create;
391 
392 	/* add to the list of vnet devices */
393 	WRITE_ENTER(&vnet_rw);
394 	vnetp->nextp = vnet_headp;
395 	vnet_headp = vnetp;
396 	RW_EXIT(&vnet_rw);
397 
398 	attach_state |= AST_vnet_list;
399 
400 	/*
401 	 * Initialize the generic vnet plugin which provides
402 	 * communication via sun4v LDC (logical domain channel) based
403 	 * resources. It will register the LDC resources as and when
404 	 * they become available.
405 	 */
406 	status = vgen_init(vnetp, reg, vnetp->dip,
407 	    (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
408 	if (status != DDI_SUCCESS) {
409 		DERR(vnetp, "vgen_init() failed\n");
410 		goto vnet_attach_fail;
411 	}
412 	attach_state |= AST_vgen_init;
413 
414 	/* register with MAC layer */
415 	status = vnet_mac_register(vnetp);
416 	if (status != DDI_SUCCESS) {
417 		goto vnet_attach_fail;
418 	}
419 
420 	DBG1(NULL, "instance(%d) exit\n", instance);
421 	return (DDI_SUCCESS);
422 
423 vnet_attach_fail:
424 
425 	if (attach_state & AST_vnet_list) {
426 		vnet_t		**vnetpp;
427 		/* unlink from instance(vnet_t) list */
428 		WRITE_ENTER(&vnet_rw);
429 		for (vnetpp = &vnet_headp; *vnetpp;
430 		    vnetpp = &(*vnetpp)->nextp) {
431 			if (*vnetpp == vnetp) {
432 				*vnetpp = vnetp->nextp;
433 				break;
434 			}
435 		}
436 		RW_EXIT(&vnet_rw);
437 	}
438 
439 	if (attach_state & AST_vdds_init) {
440 		vdds_cleanup(vnetp);
441 	}
442 	if (attach_state & AST_taskq_create) {
443 		ddi_taskq_destroy(vnetp->taskqp);
444 	}
445 	if (attach_state & AST_fdbh_alloc) {
446 		vnet_fdb_destroy(vnetp);
447 	}
448 	if (attach_state & AST_vgen_init) {
449 		(void) vgen_uninit(vnetp->vgenhdl);
450 	}
451 	if (attach_state & AST_vnet_alloc) {
452 		rw_destroy(&vnetp->vrwlock);
453 		rw_destroy(&vnetp->vsw_fp_rw);
454 		KMEM_FREE(vnetp);
455 	}
456 	return (DDI_FAILURE);
457 }
458 
459 /*
460  * detach(9E): detach a device from the system.
461  */
462 static int
463 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
464 {
465 	vnet_t		*vnetp;
466 	vnet_t		**vnetpp;
467 	int		instance;
468 	int		rv;
469 
470 	instance = ddi_get_instance(dip);
471 	DBG1(NULL, "instance(%d) enter\n", instance);
472 
473 	vnetp = ddi_get_driver_private(dip);
474 	if (vnetp == NULL) {
475 		goto vnet_detach_fail;
476 	}
477 
478 	switch (cmd) {
479 	case DDI_DETACH:
480 		break;
481 	case DDI_SUSPEND:
482 	case DDI_PM_SUSPEND:
483 	default:
484 		goto vnet_detach_fail;
485 	}
486 
487 	(void) vdds_cleanup(vnetp);
488 	rv = vgen_uninit(vnetp->vgenhdl);
489 	if (rv != DDI_SUCCESS) {
490 		goto vnet_detach_fail;
491 	}
492 
493 	/*
494 	 * Unregister from the MAC subsystem.  This can fail, in
495 	 * particular if there are DLPI style-2 streams still open -
496 	 * in which case we just return failure.
497 	 */
498 	if (mac_unregister(vnetp->mh) != 0)
499 		goto vnet_detach_fail;
500 
501 	/* unlink from instance(vnet_t) list */
502 	WRITE_ENTER(&vnet_rw);
503 	for (vnetpp = &vnet_headp; *vnetpp; vnetpp = &(*vnetpp)->nextp) {
504 		if (*vnetpp == vnetp) {
505 			*vnetpp = vnetp->nextp;
506 			break;
507 		}
508 	}
509 	RW_EXIT(&vnet_rw);
510 
511 	ddi_taskq_destroy(vnetp->taskqp);
512 	/* destroy fdb */
513 	vnet_fdb_destroy(vnetp);
514 
515 	rw_destroy(&vnetp->vrwlock);
516 	rw_destroy(&vnetp->vsw_fp_rw);
517 	KMEM_FREE(vnetp);
518 
519 	return (DDI_SUCCESS);
520 
521 vnet_detach_fail:
522 	return (DDI_FAILURE);
523 }
524 
525 /* enable the device for transmit/receive */
526 static int
527 vnet_m_start(void *arg)
528 {
529 	vnet_t		*vnetp = arg;
530 
531 	DBG1(vnetp, "enter\n");
532 
533 	WRITE_ENTER(&vnetp->vrwlock);
534 	vnetp->flags |= VNET_STARTED;
535 	vnet_start_resources(vnetp);
536 	RW_EXIT(&vnetp->vrwlock);
537 
538 	DBG1(vnetp, "exit\n");
539 	return (VNET_SUCCESS);
540 
541 }
542 
543 /* stop transmit/receive for the device */
544 static void
545 vnet_m_stop(void *arg)
546 {
547 	vnet_t		*vnetp = arg;
548 
549 	DBG1(vnetp, "enter\n");
550 
551 	WRITE_ENTER(&vnetp->vrwlock);
552 	if (vnetp->flags & VNET_STARTED) {
553 		vnet_stop_resources(vnetp);
554 		vnetp->flags &= ~VNET_STARTED;
555 	}
556 	RW_EXIT(&vnetp->vrwlock);
557 
558 	DBG1(vnetp, "exit\n");
559 }
560 
561 /* set the unicast mac address of the device */
562 static int
563 vnet_m_unicst(void *arg, const uint8_t *macaddr)
564 {
565 	_NOTE(ARGUNUSED(macaddr))
566 
567 	vnet_t *vnetp = arg;
568 
569 	DBG1(vnetp, "enter\n");
570 	/*
571 	 * NOTE: setting mac address dynamically is not supported.
572 	 */
573 	DBG1(vnetp, "exit\n");
574 
575 	return (VNET_FAILURE);
576 }
577 
578 /* enable/disable a multicast address */
579 static int
580 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
581 {
582 	_NOTE(ARGUNUSED(add, mca))
583 
584 	vnet_t *vnetp = arg;
585 	vnet_res_t	*vresp;
586 	mac_register_t	*macp;
587 	mac_callbacks_t	*cbp;
588 	int rv = VNET_SUCCESS;
589 
590 	DBG1(vnetp, "enter\n");
591 
592 	READ_ENTER(&vnetp->vrwlock);
593 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
594 		if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
595 			macp = &vresp->macreg;
596 			cbp = macp->m_callbacks;
597 			rv = cbp->mc_multicst(macp->m_driver, add, mca);
598 		}
599 	}
600 	RW_EXIT(&vnetp->vrwlock);
601 
602 	DBG1(vnetp, "exit(%d)\n", rv);
603 	return (rv);
604 }
605 
606 /* set or clear promiscuous mode on the device */
607 static int
608 vnet_m_promisc(void *arg, boolean_t on)
609 {
610 	_NOTE(ARGUNUSED(on))
611 
612 	vnet_t *vnetp = arg;
613 	DBG1(vnetp, "enter\n");
614 	/*
615 	 * NOTE: setting promiscuous mode is not supported, just return success.
616 	 */
617 	DBG1(vnetp, "exit\n");
618 	return (VNET_SUCCESS);
619 }
620 
621 /*
622  * Transmit a chain of packets. This function provides switching functionality
623  * based on the destination mac address to reach other guests (within ldoms) or
624  * external hosts.
625  */
626 mblk_t *
627 vnet_m_tx(void *arg, mblk_t *mp)
628 {
629 	vnet_t			*vnetp;
630 	vnet_res_t		*vresp;
631 	mblk_t			*next;
632 	mblk_t			*resid_mp;
633 	mac_register_t		*macp;
634 	struct ether_header	*ehp;
635 	boolean_t		is_unicast;
636 	boolean_t		is_pvid;	/* non-default pvid ? */
637 	boolean_t		hres;		/* Hybrid resource ? */
638 
639 	vnetp = (vnet_t *)arg;
640 	DBG1(vnetp, "enter\n");
641 	ASSERT(mp != NULL);
642 
643 	is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE;
644 
645 	while (mp != NULL) {
646 
647 		next = mp->b_next;
648 		mp->b_next = NULL;
649 
650 		/*
651 		 * Find fdb entry for the destination
652 		 * and hold a reference to it.
653 		 */
654 		ehp = (struct ether_header *)mp->b_rptr;
655 		vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
656 		if (vresp != NULL) {
657 
658 			/*
659 			 * Destination found in FDB.
660 			 * The destination is a vnet device within ldoms
661 			 * and directly reachable, invoke the tx function
662 			 * in the fdb entry.
663 			 */
664 			macp = &vresp->macreg;
665 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
666 
667 			/* tx done; now release ref on fdb entry */
668 			VNET_FDBE_REFRELE(vresp);
669 
670 			if (resid_mp != NULL) {
671 				/* m_tx failed */
672 				mp->b_next = next;
673 				break;
674 			}
675 		} else {
676 			is_unicast = !(IS_BROADCAST(ehp) ||
677 			    (IS_MULTICAST(ehp)));
678 			/*
679 			 * Destination is not in FDB.
680 			 * If the destination is broadcast or multicast,
681 			 * then forward the packet to vswitch.
682 			 * If a Hybrid resource avilable, then send the
683 			 * unicast packet via hybrid resource, otherwise
684 			 * forward it to vswitch.
685 			 */
686 			READ_ENTER(&vnetp->vsw_fp_rw);
687 
688 			if ((is_unicast) && (vnetp->hio_fp != NULL)) {
689 				vresp = vnetp->hio_fp;
690 				hres = B_TRUE;
691 			} else {
692 				vresp = vnetp->vsw_fp;
693 				hres = B_FALSE;
694 			}
695 			if (vresp == NULL) {
696 				/*
697 				 * no fdb entry to vsw? drop the packet.
698 				 */
699 				RW_EXIT(&vnetp->vsw_fp_rw);
700 				freemsg(mp);
701 				mp = next;
702 				continue;
703 			}
704 
705 			/* ref hold the fdb entry to vsw */
706 			VNET_FDBE_REFHOLD(vresp);
707 
708 			RW_EXIT(&vnetp->vsw_fp_rw);
709 
710 			/*
711 			 * In the case of a hybrid resource we need to insert
712 			 * the tag for the pvid case here; unlike packets that
713 			 * are destined to a vnet/vsw in which case the vgen
714 			 * layer does the tagging before sending it over ldc.
715 			 */
716 			if (hres == B_TRUE) {
717 				/*
718 				 * Determine if the frame being transmitted
719 				 * over the hybrid resource is untagged. If so,
720 				 * insert the tag before transmitting.
721 				 */
722 				if (is_pvid == B_TRUE &&
723 				    ehp->ether_type != htons(ETHERTYPE_VLAN)) {
724 
725 					mp = vnet_vlan_insert_tag(mp,
726 					    vnetp->pvid);
727 					if (mp == NULL) {
728 						VNET_FDBE_REFRELE(vresp);
729 						mp = next;
730 						continue;
731 					}
732 
733 				}
734 			}
735 
736 			macp = &vresp->macreg;
737 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
738 
739 			/* tx done; now release ref on fdb entry */
740 			VNET_FDBE_REFRELE(vresp);
741 
742 			if (resid_mp != NULL) {
743 				/* m_tx failed */
744 				mp->b_next = next;
745 				break;
746 			}
747 		}
748 
749 		mp = next;
750 	}
751 
752 	DBG1(vnetp, "exit\n");
753 	return (mp);
754 }
755 
756 /* get statistics from the device */
757 int
758 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
759 {
760 	vnet_t *vnetp = arg;
761 	vnet_res_t	*vresp;
762 	mac_register_t	*macp;
763 	mac_callbacks_t	*cbp;
764 	uint64_t val_total = 0;
765 
766 	DBG1(vnetp, "enter\n");
767 
768 	/*
769 	 * get the specified statistic from each transport and return the
770 	 * aggregate val.  This obviously only works for counters.
771 	 */
772 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
773 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
774 		return (ENOTSUP);
775 	}
776 
777 	READ_ENTER(&vnetp->vrwlock);
778 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
779 		macp = &vresp->macreg;
780 		cbp = macp->m_callbacks;
781 		if (cbp->mc_getstat(macp->m_driver, stat, val) == 0)
782 			val_total += *val;
783 	}
784 	RW_EXIT(&vnetp->vrwlock);
785 
786 	*val = val_total;
787 
788 	DBG1(vnetp, "exit\n");
789 	return (0);
790 }
791 
792 /* wrapper function for mac_register() */
793 static int
794 vnet_mac_register(vnet_t *vnetp)
795 {
796 	mac_register_t	*macp;
797 	int		err;
798 
799 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
800 		return (DDI_FAILURE);
801 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
802 	macp->m_driver = vnetp;
803 	macp->m_dip = vnetp->dip;
804 	macp->m_src_addr = vnetp->curr_macaddr;
805 	macp->m_callbacks = &vnet_m_callbacks;
806 	macp->m_min_sdu = 0;
807 	macp->m_max_sdu = vnetp->mtu;
808 	macp->m_margin = VLAN_TAGSZ;
809 
810 	/*
811 	 * Finally, we're ready to register ourselves with the MAC layer
812 	 * interface; if this succeeds, we're all ready to start()
813 	 */
814 	err = mac_register(macp, &vnetp->mh);
815 	mac_free(macp);
816 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
817 }
818 
819 /* read the mac address of the device */
820 static int
821 vnet_read_mac_address(vnet_t *vnetp)
822 {
823 	uchar_t 	*macaddr;
824 	uint32_t 	size;
825 	int 		rv;
826 
827 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
828 	    DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
829 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
830 		DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
831 		    macaddr_propname, rv);
832 		return (DDI_FAILURE);
833 	}
834 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
835 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
836 	ddi_prop_free(macaddr);
837 
838 	return (DDI_SUCCESS);
839 }
840 
841 static void
842 vnet_fdb_create(vnet_t *vnetp)
843 {
844 	char		hashname[MAXNAMELEN];
845 
846 	(void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
847 	    vnetp->instance);
848 	vnetp->fdb_nchains = vnet_fdb_nchains;
849 	vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
850 	    mod_hash_null_valdtor, sizeof (void *));
851 }
852 
853 static void
854 vnet_fdb_destroy(vnet_t *vnetp)
855 {
856 	/* destroy fdb-hash-table */
857 	if (vnetp->fdb_hashp != NULL) {
858 		mod_hash_destroy_hash(vnetp->fdb_hashp);
859 		vnetp->fdb_hashp = NULL;
860 		vnetp->fdb_nchains = 0;
861 	}
862 }
863 
864 /*
865  * Add an entry into the fdb.
866  */
867 void
868 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp)
869 {
870 	uint64_t	addr = 0;
871 	int		rv;
872 
873 	KEY_HASH(addr, vresp->rem_macaddr);
874 
875 	/*
876 	 * If the entry being added corresponds to LDC_SERVICE resource,
877 	 * that is, vswitch connection, it is added to the hash and also
878 	 * the entry is cached, an additional reference count reflects
879 	 * this. The HYBRID resource is not added to the hash, but only
880 	 * cached, as it is only used for sending out packets for unknown
881 	 * unicast destinations.
882 	 */
883 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
884 	    (vresp->refcnt = 1) : (vresp->refcnt = 0);
885 
886 	/*
887 	 * Note: duplicate keys will be rejected by mod_hash.
888 	 */
889 	if (vresp->type != VIO_NET_RES_HYBRID) {
890 		rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
891 		    (mod_hash_val_t)vresp);
892 		if (rv != 0) {
893 			DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
894 			return;
895 		}
896 	}
897 
898 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
899 		/* Cache the fdb entry to vsw-port */
900 		WRITE_ENTER(&vnetp->vsw_fp_rw);
901 		if (vnetp->vsw_fp == NULL)
902 			vnetp->vsw_fp = vresp;
903 		RW_EXIT(&vnetp->vsw_fp_rw);
904 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
905 		/* Cache the fdb entry to hybrid resource */
906 		WRITE_ENTER(&vnetp->vsw_fp_rw);
907 		if (vnetp->hio_fp == NULL)
908 			vnetp->hio_fp = vresp;
909 		RW_EXIT(&vnetp->vsw_fp_rw);
910 	}
911 }
912 
913 /*
914  * Remove an entry from fdb.
915  */
916 static void
917 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp)
918 {
919 	uint64_t	addr = 0;
920 	int		rv;
921 	uint32_t	refcnt;
922 	vnet_res_t	*tmp;
923 
924 	KEY_HASH(addr, vresp->rem_macaddr);
925 
926 	/*
927 	 * Remove the entry from fdb hash table.
928 	 * This prevents further references to this fdb entry.
929 	 */
930 	if (vresp->type != VIO_NET_RES_HYBRID) {
931 		rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
932 		    (mod_hash_val_t *)&tmp);
933 		if (rv != 0) {
934 			/*
935 			 * As the resources are added to the hash only
936 			 * after they are started, this can occur if
937 			 * a resource unregisters before it is ever started.
938 			 */
939 			return;
940 		}
941 	}
942 
943 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
944 		WRITE_ENTER(&vnetp->vsw_fp_rw);
945 
946 		ASSERT(tmp == vnetp->vsw_fp);
947 		vnetp->vsw_fp = NULL;
948 
949 		RW_EXIT(&vnetp->vsw_fp_rw);
950 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
951 		WRITE_ENTER(&vnetp->vsw_fp_rw);
952 
953 		vnetp->hio_fp = NULL;
954 
955 		RW_EXIT(&vnetp->vsw_fp_rw);
956 	}
957 
958 	/*
959 	 * If there are threads already ref holding before the entry was
960 	 * removed from hash table, then wait for ref count to drop to zero.
961 	 */
962 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
963 	    (refcnt = 1) : (refcnt = 0);
964 	while (vresp->refcnt > refcnt) {
965 		delay(drv_usectohz(vnet_fdbe_refcnt_delay));
966 	}
967 }
968 
969 /*
970  * Search fdb for a given mac address. If an entry is found, hold
971  * a reference to it and return the entry; else returns NULL.
972  */
973 static vnet_res_t *
974 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
975 {
976 	uint64_t	key = 0;
977 	vnet_res_t	*vresp;
978 	int		rv;
979 
980 	KEY_HASH(key, addrp->ether_addr_octet);
981 
982 	rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
983 	    (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb);
984 
985 	if (rv != 0)
986 		return (NULL);
987 
988 	return (vresp);
989 }
990 
991 /*
992  * Callback function provided to mod_hash_find_cb(). After finding the fdb
993  * entry corresponding to the key (macaddr), this callback will be invoked by
994  * mod_hash_find_cb() to atomically increment the reference count on the fdb
995  * entry before returning the found entry.
996  */
997 static void
998 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
999 {
1000 	_NOTE(ARGUNUSED(key))
1001 	VNET_FDBE_REFHOLD((vnet_res_t *)val);
1002 }
1003 
1004 /*
1005  * Frames received that are tagged with the pvid of the vnet device must be
1006  * untagged before sending up the stack. This function walks the chain of rx
1007  * frames, untags any such frames and returns the updated chain.
1008  *
1009  * Arguments:
1010  *    pvid:  pvid of the vnet device for which packets are being received
1011  *    mp:    head of pkt chain to be validated and untagged
1012  *
1013  * Returns:
1014  *    mp:    head of updated chain of packets
1015  */
1016 static void
1017 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp)
1018 {
1019 	struct ether_vlan_header	*evhp;
1020 	mblk_t				*bp;
1021 	mblk_t				*bpt;
1022 	mblk_t				*bph;
1023 	mblk_t				*bpn;
1024 
1025 	bpn = bph = bpt = NULL;
1026 
1027 	for (bp = *mp; bp != NULL; bp = bpn) {
1028 
1029 		bpn = bp->b_next;
1030 		bp->b_next = bp->b_prev = NULL;
1031 
1032 		evhp = (struct ether_vlan_header *)bp->b_rptr;
1033 
1034 		if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN &&
1035 		    VLAN_ID(ntohs(evhp->ether_tci)) == pvid) {
1036 
1037 			bp = vnet_vlan_remove_tag(bp);
1038 			if (bp == NULL) {
1039 				continue;
1040 			}
1041 
1042 		}
1043 
1044 		/* build a chain of processed packets */
1045 		if (bph == NULL) {
1046 			bph = bpt = bp;
1047 		} else {
1048 			bpt->b_next = bp;
1049 			bpt = bp;
1050 		}
1051 
1052 	}
1053 
1054 	*mp = bph;
1055 }
1056 
1057 static void
1058 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
1059 {
1060 	vnet_res_t	*vresp = (vnet_res_t *)vrh;
1061 	vnet_t		*vnetp = vresp->vnetp;
1062 
1063 	if ((vnetp == NULL) || (vnetp->mh == 0)) {
1064 		freemsgchain(mp);
1065 		return;
1066 	}
1067 
1068 	/*
1069 	 * Packets received over a hybrid resource need additional processing
1070 	 * to remove the tag, for the pvid case. The underlying resource is
1071 	 * not aware of the vnet's pvid and thus packets are received with the
1072 	 * vlan tag in the header; unlike packets that are received over a ldc
1073 	 * channel in which case the peer vnet/vsw would have already removed
1074 	 * the tag.
1075 	 */
1076 	if (vresp->type == VIO_NET_RES_HYBRID &&
1077 	    vnetp->pvid != vnetp->default_vlan_id) {
1078 
1079 		vnet_rx_frames_untag(vnetp->pvid, &mp);
1080 		if (mp == NULL) {
1081 			return;
1082 		}
1083 	}
1084 
1085 	mac_rx(vnetp->mh, NULL, mp);
1086 }
1087 
1088 void
1089 vnet_tx_update(vio_net_handle_t vrh)
1090 {
1091 	vnet_res_t *vresp = (vnet_res_t *)vrh;
1092 	vnet_t *vnetp = vresp->vnetp;
1093 
1094 	if ((vnetp != NULL) && (vnetp->mh != NULL)) {
1095 		mac_tx_update(vnetp->mh);
1096 	}
1097 }
1098 
1099 /*
1100  * Update the new mtu of vnet into the mac layer. First check if the device has
1101  * been plumbed and if so fail the mtu update. Returns 0 on success.
1102  */
1103 int
1104 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu)
1105 {
1106 	int	rv;
1107 
1108 	if (vnetp == NULL || vnetp->mh == NULL) {
1109 		return (EINVAL);
1110 	}
1111 
1112 	WRITE_ENTER(&vnetp->vrwlock);
1113 
1114 	if (vnetp->flags & VNET_STARTED) {
1115 		RW_EXIT(&vnetp->vrwlock);
1116 		cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu "
1117 		    "update as the device is plumbed\n",
1118 		    vnetp->instance);
1119 		return (EBUSY);
1120 	}
1121 
1122 	/* update mtu in the mac layer */
1123 	rv = mac_maxsdu_update(vnetp->mh, mtu);
1124 	if (rv != 0) {
1125 		RW_EXIT(&vnetp->vrwlock);
1126 		cmn_err(CE_NOTE,
1127 		    "!vnet%d: Unable to update mtu with mac layer\n",
1128 		    vnetp->instance);
1129 		return (EIO);
1130 	}
1131 
1132 	vnetp->mtu = mtu;
1133 
1134 	RW_EXIT(&vnetp->vrwlock);
1135 
1136 	return (0);
1137 }
1138 
1139 /*
1140  * vio_net_resource_reg -- An interface called to register a resource
1141  *	with vnet.
1142  *	macp -- a GLDv3 mac_register that has all the details of
1143  *		a resource and its callbacks etc.
1144  *	type -- resource type.
1145  *	local_macaddr -- resource's MAC address. This is used to
1146  *			 associate a resource with a corresponding vnet.
1147  *	remote_macaddr -- remote side MAC address. This is ignored for
1148  *			  the Hybrid resources.
1149  *	vhp -- A handle returned to the caller.
1150  *	vcb -- A set of callbacks provided to the callers.
1151  */
1152 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
1153     ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
1154     vio_net_callbacks_t *vcb)
1155 {
1156 	vnet_t	*vnetp;
1157 	vnet_res_t *vresp;
1158 
1159 	vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
1160 	ether_copy(local_macaddr, vresp->local_macaddr);
1161 	ether_copy(rem_macaddr, vresp->rem_macaddr);
1162 	vresp->type = type;
1163 	bcopy(macp, &vresp->macreg, sizeof (mac_register_t));
1164 
1165 	DBG1(NULL, "Resource Registerig type=0%X\n", type);
1166 
1167 	READ_ENTER(&vnet_rw);
1168 	vnetp = vnet_headp;
1169 	while (vnetp != NULL) {
1170 		if (VNET_MATCH_RES(vresp, vnetp)) {
1171 			vresp->vnetp = vnetp;
1172 
1173 			/* Setup kstats for hio resource */
1174 			if (vresp->type == VIO_NET_RES_HYBRID) {
1175 				vresp->ksp = vnet_hio_setup_kstats(DRV_NAME,
1176 				    "hio", vresp);
1177 				if (vresp->ksp == NULL) {
1178 					cmn_err(CE_NOTE, "!vnet%d: Cannot "
1179 					    "create kstats for hio resource",
1180 					    vnetp->instance);
1181 				}
1182 			}
1183 
1184 			WRITE_ENTER(&vnetp->vrwlock);
1185 			vresp->nextp = vnetp->vres_list;
1186 			vnetp->vres_list = vresp;
1187 			RW_EXIT(&vnetp->vrwlock);
1188 			break;
1189 		}
1190 		vnetp = vnetp->nextp;
1191 	}
1192 	RW_EXIT(&vnet_rw);
1193 	if (vresp->vnetp == NULL) {
1194 		DWARN(NULL, "No vnet instance");
1195 		kmem_free(vresp, sizeof (vnet_res_t));
1196 		return (ENXIO);
1197 	}
1198 
1199 	*vhp = vresp;
1200 	vcb->vio_net_rx_cb = vnet_rx;
1201 	vcb->vio_net_tx_update = vnet_tx_update;
1202 	vcb->vio_net_report_err = vnet_handle_res_err;
1203 
1204 	/* Dispatch a task to start resources */
1205 	vnet_dispatch_res_task(vnetp);
1206 	return (0);
1207 }
1208 
1209 /*
1210  * vio_net_resource_unreg -- An interface to unregister a resource.
1211  */
1212 void
1213 vio_net_resource_unreg(vio_net_handle_t vhp)
1214 {
1215 	vnet_res_t *vresp = (vnet_res_t *)vhp;
1216 	vnet_t *vnetp = vresp->vnetp;
1217 	vnet_res_t *vrp;
1218 	kstat_t *ksp = NULL;
1219 
1220 	DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
1221 
1222 	ASSERT(vnetp != NULL);
1223 	vnet_fdbe_del(vnetp, vresp);
1224 
1225 	WRITE_ENTER(&vnetp->vrwlock);
1226 	if (vresp == vnetp->vres_list) {
1227 		vnetp->vres_list = vresp->nextp;
1228 	} else {
1229 		vrp = vnetp->vres_list;
1230 		while (vrp->nextp != NULL) {
1231 			if (vrp->nextp == vresp) {
1232 				vrp->nextp = vresp->nextp;
1233 				break;
1234 			}
1235 			vrp = vrp->nextp;
1236 		}
1237 	}
1238 
1239 	ksp = vresp->ksp;
1240 	vresp->ksp = NULL;
1241 
1242 	vresp->vnetp = NULL;
1243 	vresp->nextp = NULL;
1244 	RW_EXIT(&vnetp->vrwlock);
1245 	vnet_hio_destroy_kstats(ksp);
1246 	KMEM_FREE(vresp);
1247 }
1248 
1249 /*
1250  * vnet_dds_rx -- an interface called by vgen to DDS messages.
1251  */
1252 void
1253 vnet_dds_rx(void *arg, void *dmsg)
1254 {
1255 	vnet_t *vnetp = arg;
1256 	vdds_process_dds_msg(vnetp, dmsg);
1257 }
1258 
1259 /*
1260  * vnet_send_dds_msg -- An interface provided to DDS to send
1261  *	DDS messages. This simply sends meessages via vgen.
1262  */
1263 int
1264 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg)
1265 {
1266 	int rv;
1267 
1268 	if (vnetp->vgenhdl != NULL) {
1269 		rv = vgen_dds_tx(vnetp->vgenhdl, dmsg);
1270 	}
1271 	return (rv);
1272 }
1273 
1274 /*
1275  * vnet_handle_res_err -- A callback function called by a resource
1276  *	to report an error. For example, vgen can call to report
1277  *	an LDC down/reset event. This will trigger cleanup of associated
1278  *	Hybrid resource.
1279  */
1280 /* ARGSUSED */
1281 static void
1282 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err)
1283 {
1284 	vnet_res_t *vresp = (vnet_res_t *)vrh;
1285 	vnet_t *vnetp = vresp->vnetp;
1286 	int rv;
1287 
1288 	if (vnetp == NULL) {
1289 		return;
1290 	}
1291 	if ((vresp->type != VIO_NET_RES_LDC_SERVICE) &&
1292 	    (vresp->type != VIO_NET_RES_HYBRID)) {
1293 		return;
1294 	}
1295 	rv = ddi_taskq_dispatch(vnetp->taskqp, vdds_cleanup_hybrid_res,
1296 	    vnetp, DDI_NOSLEEP);
1297 	if (rv != DDI_SUCCESS) {
1298 		cmn_err(CE_WARN,
1299 		    "vnet%d:Failed to dispatch task to cleanup hybrid resource",
1300 		    vnetp->instance);
1301 	}
1302 }
1303 
1304 /*
1305  * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
1306  */
1307 static void
1308 vnet_dispatch_res_task(vnet_t *vnetp)
1309 {
1310 	int rv;
1311 
1312 	WRITE_ENTER(&vnetp->vrwlock);
1313 	if (vnetp->flags & VNET_STARTED) {
1314 		rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task,
1315 		    vnetp, DDI_NOSLEEP);
1316 		if (rv != DDI_SUCCESS) {
1317 			cmn_err(CE_WARN,
1318 			    "vnet%d:Can't dispatch start resource task",
1319 			    vnetp->instance);
1320 		}
1321 	}
1322 	RW_EXIT(&vnetp->vrwlock);
1323 }
1324 
1325 /*
1326  * vnet_res_start_task -- A taskq callback function that starts a resource.
1327  */
1328 static void
1329 vnet_res_start_task(void *arg)
1330 {
1331 	vnet_t *vnetp = arg;
1332 
1333 	WRITE_ENTER(&vnetp->vrwlock);
1334 	if (vnetp->flags & VNET_STARTED) {
1335 		vnet_start_resources(vnetp);
1336 	}
1337 	RW_EXIT(&vnetp->vrwlock);
1338 }
1339 
1340 /*
1341  * vnet_start_resources -- starts all resources associated with
1342  *	a vnet.
1343  */
1344 static void
1345 vnet_start_resources(vnet_t *vnetp)
1346 {
1347 	mac_register_t	*macp;
1348 	mac_callbacks_t	*cbp;
1349 	vnet_res_t	*vresp;
1350 	int rv;
1351 
1352 	DBG1(vnetp, "enter\n");
1353 
1354 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
1355 		/* skip if it is already started */
1356 		if (vresp->flags & VNET_STARTED) {
1357 			continue;
1358 		}
1359 		macp = &vresp->macreg;
1360 		cbp = macp->m_callbacks;
1361 		rv = cbp->mc_start(macp->m_driver);
1362 		if (rv == 0) {
1363 			/*
1364 			 * Successfully started the resource, so now
1365 			 * add it to the fdb.
1366 			 */
1367 			vresp->flags |= VNET_STARTED;
1368 			vnet_fdbe_add(vnetp, vresp);
1369 		}
1370 	}
1371 
1372 	DBG1(vnetp, "exit\n");
1373 
1374 }
1375 
1376 /*
1377  * vnet_stop_resources -- stop all resources associated with a vnet.
1378  */
1379 static void
1380 vnet_stop_resources(vnet_t *vnetp)
1381 {
1382 	vnet_res_t	*vresp;
1383 	vnet_res_t	*nvresp;
1384 	mac_register_t	*macp;
1385 	mac_callbacks_t	*cbp;
1386 
1387 	DBG1(vnetp, "enter\n");
1388 
1389 	for (vresp = vnetp->vres_list; vresp != NULL; ) {
1390 		nvresp = vresp->nextp;
1391 		if (vresp->flags & VNET_STARTED) {
1392 			macp = &vresp->macreg;
1393 			cbp = macp->m_callbacks;
1394 			cbp->mc_stop(macp->m_driver);
1395 			vresp->flags &= ~VNET_STARTED;
1396 		}
1397 		vresp = nvresp;
1398 	}
1399 	DBG1(vnetp, "exit\n");
1400 }
1401 
1402 /*
1403  * Setup kstats for the HIO statistics.
1404  * NOTE: the synchronization for the statistics is the
1405  * responsibility of the caller.
1406  */
1407 kstat_t *
1408 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp)
1409 {
1410 	kstat_t *ksp;
1411 	vnet_t *vnetp = vresp->vnetp;
1412 	vnet_hio_kstats_t *hiokp;
1413 	size_t size;
1414 
1415 	ASSERT(vnetp != NULL);
1416 	size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t);
1417 	ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net",
1418 	    KSTAT_TYPE_NAMED, size, 0);
1419 	if (ksp == NULL) {
1420 		return (NULL);
1421 	}
1422 
1423 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1424 	kstat_named_init(&hiokp->ipackets,		"ipackets",
1425 	    KSTAT_DATA_ULONG);
1426 	kstat_named_init(&hiokp->ierrors,		"ierrors",
1427 	    KSTAT_DATA_ULONG);
1428 	kstat_named_init(&hiokp->opackets,		"opackets",
1429 	    KSTAT_DATA_ULONG);
1430 	kstat_named_init(&hiokp->oerrors,		"oerrors",
1431 	    KSTAT_DATA_ULONG);
1432 
1433 
1434 	/* MIB II kstat variables */
1435 	kstat_named_init(&hiokp->rbytes,		"rbytes",
1436 	    KSTAT_DATA_ULONG);
1437 	kstat_named_init(&hiokp->obytes,		"obytes",
1438 	    KSTAT_DATA_ULONG);
1439 	kstat_named_init(&hiokp->multircv,		"multircv",
1440 	    KSTAT_DATA_ULONG);
1441 	kstat_named_init(&hiokp->multixmt,		"multixmt",
1442 	    KSTAT_DATA_ULONG);
1443 	kstat_named_init(&hiokp->brdcstrcv,		"brdcstrcv",
1444 	    KSTAT_DATA_ULONG);
1445 	kstat_named_init(&hiokp->brdcstxmt,		"brdcstxmt",
1446 	    KSTAT_DATA_ULONG);
1447 	kstat_named_init(&hiokp->norcvbuf,		"norcvbuf",
1448 	    KSTAT_DATA_ULONG);
1449 	kstat_named_init(&hiokp->noxmtbuf,		"noxmtbuf",
1450 	    KSTAT_DATA_ULONG);
1451 
1452 	ksp->ks_update = vnet_hio_update_kstats;
1453 	ksp->ks_private = (void *)vresp;
1454 	kstat_install(ksp);
1455 	return (ksp);
1456 }
1457 
1458 /*
1459  * Destroy kstats.
1460  */
1461 static void
1462 vnet_hio_destroy_kstats(kstat_t *ksp)
1463 {
1464 	if (ksp != NULL)
1465 		kstat_delete(ksp);
1466 }
1467 
1468 /*
1469  * Update the kstats.
1470  */
1471 static int
1472 vnet_hio_update_kstats(kstat_t *ksp, int rw)
1473 {
1474 	vnet_t *vnetp;
1475 	vnet_res_t *vresp;
1476 	vnet_hio_stats_t statsp;
1477 	vnet_hio_kstats_t *hiokp;
1478 
1479 	vresp = (vnet_res_t *)ksp->ks_private;
1480 	vnetp = vresp->vnetp;
1481 
1482 	bzero(&statsp, sizeof (vnet_hio_stats_t));
1483 
1484 	READ_ENTER(&vnetp->vsw_fp_rw);
1485 	if (vnetp->hio_fp == NULL) {
1486 		/* not using hio resources, just return */
1487 		RW_EXIT(&vnetp->vsw_fp_rw);
1488 		return (0);
1489 	}
1490 	VNET_FDBE_REFHOLD(vnetp->hio_fp);
1491 	RW_EXIT(&vnetp->vsw_fp_rw);
1492 	vnet_hio_get_stats(vnetp->hio_fp, &statsp);
1493 	VNET_FDBE_REFRELE(vnetp->hio_fp);
1494 
1495 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1496 
1497 	if (rw == KSTAT_READ) {
1498 		/* Link Input/Output stats */
1499 		hiokp->ipackets.value.ul	= (uint32_t)statsp.ipackets;
1500 		hiokp->ipackets64.value.ull	= statsp.ipackets;
1501 		hiokp->ierrors.value.ul		= statsp.ierrors;
1502 		hiokp->opackets.value.ul	= (uint32_t)statsp.opackets;
1503 		hiokp->opackets64.value.ull	= statsp.opackets;
1504 		hiokp->oerrors.value.ul		= statsp.oerrors;
1505 
1506 		/* MIB II kstat variables */
1507 		hiokp->rbytes.value.ul		= (uint32_t)statsp.rbytes;
1508 		hiokp->rbytes64.value.ull	= statsp.rbytes;
1509 		hiokp->obytes.value.ul		= (uint32_t)statsp.obytes;
1510 		hiokp->obytes64.value.ull	= statsp.obytes;
1511 		hiokp->multircv.value.ul	= statsp.multircv;
1512 		hiokp->multixmt.value.ul	= statsp.multixmt;
1513 		hiokp->brdcstrcv.value.ul	= statsp.brdcstrcv;
1514 		hiokp->brdcstxmt.value.ul	= statsp.brdcstxmt;
1515 		hiokp->norcvbuf.value.ul	= statsp.norcvbuf;
1516 		hiokp->noxmtbuf.value.ul	= statsp.noxmtbuf;
1517 	} else {
1518 		return (EACCES);
1519 	}
1520 
1521 	return (0);
1522 }
1523 
1524 static void
1525 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp)
1526 {
1527 	mac_register_t		*macp;
1528 	mac_callbacks_t		*cbp;
1529 	uint64_t		val;
1530 	int			stat;
1531 
1532 	/*
1533 	 * get the specified statistics from the underlying nxge.
1534 	 */
1535 	macp = &vresp->macreg;
1536 	cbp = macp->m_callbacks;
1537 	for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) {
1538 		if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) {
1539 			switch (stat) {
1540 			case MAC_STAT_IPACKETS:
1541 				statsp->ipackets = val;
1542 				break;
1543 
1544 			case MAC_STAT_IERRORS:
1545 				statsp->ierrors = val;
1546 				break;
1547 
1548 			case MAC_STAT_OPACKETS:
1549 				statsp->opackets = val;
1550 				break;
1551 
1552 			case MAC_STAT_OERRORS:
1553 				statsp->oerrors = val;
1554 				break;
1555 
1556 			case MAC_STAT_RBYTES:
1557 				statsp->rbytes = val;
1558 				break;
1559 
1560 			case MAC_STAT_OBYTES:
1561 				statsp->obytes = val;
1562 				break;
1563 
1564 			case MAC_STAT_MULTIRCV:
1565 				statsp->multircv = val;
1566 				break;
1567 
1568 			case MAC_STAT_MULTIXMT:
1569 				statsp->multixmt = val;
1570 				break;
1571 
1572 			case MAC_STAT_BRDCSTRCV:
1573 				statsp->brdcstrcv = val;
1574 				break;
1575 
1576 			case MAC_STAT_BRDCSTXMT:
1577 				statsp->brdcstxmt = val;
1578 				break;
1579 
1580 			case MAC_STAT_NOXMTBUF:
1581 				statsp->noxmtbuf = val;
1582 				break;
1583 
1584 			case MAC_STAT_NORCVBUF:
1585 				statsp->norcvbuf = val;
1586 				break;
1587 
1588 			default:
1589 				/*
1590 				 * parameters not interested.
1591 				 */
1592 				break;
1593 			}
1594 		}
1595 	}
1596 }
1597