xref: /titanic_41/usr/src/uts/sun4v/io/vnet.c (revision cde2885fdf538266ee2a3b08dee2d5075ce8fa2b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/param.h>
30 #include <sys/stream.h>
31 #include <sys/kmem.h>
32 #include <sys/conf.h>
33 #include <sys/devops.h>
34 #include <sys/ksynch.h>
35 #include <sys/stat.h>
36 #include <sys/modctl.h>
37 #include <sys/modhash.h>
38 #include <sys/debug.h>
39 #include <sys/ethernet.h>
40 #include <sys/dlpi.h>
41 #include <net/if.h>
42 #include <sys/mac_provider.h>
43 #include <sys/mac_ether.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/strsun.h>
47 #include <sys/note.h>
48 #include <sys/atomic.h>
49 #include <sys/vnet.h>
50 #include <sys/vlan.h>
51 #include <sys/vnet_mailbox.h>
52 #include <sys/vnet_common.h>
53 #include <sys/dds.h>
54 #include <sys/strsubr.h>
55 #include <sys/taskq.h>
56 
57 /*
58  * Function prototypes.
59  */
60 
61 /* DDI entrypoints */
62 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
63 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
64 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
65 
66 /* MAC entrypoints  */
67 static int vnet_m_stat(void *, uint_t, uint64_t *);
68 static int vnet_m_start(void *);
69 static void vnet_m_stop(void *);
70 static int vnet_m_promisc(void *, boolean_t);
71 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
72 static int vnet_m_unicst(void *, const uint8_t *);
73 mblk_t *vnet_m_tx(void *, mblk_t *);
74 
75 /* vnet internal functions */
76 static int vnet_unattach(vnet_t *vnetp);
77 static int vnet_mac_register(vnet_t *);
78 static int vnet_read_mac_address(vnet_t *vnetp);
79 
80 /* Forwarding database (FDB) routines */
81 static void vnet_fdb_create(vnet_t *vnetp);
82 static void vnet_fdb_destroy(vnet_t *vnetp);
83 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp);
84 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
85 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp);
86 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp);
87 
88 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp);
89 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp);
90 static void vnet_tx_update(vio_net_handle_t vrh);
91 static void vnet_res_start_task(void *arg);
92 static void vnet_start_resources(vnet_t *vnetp);
93 static void vnet_stop_resources(vnet_t *vnetp);
94 static void vnet_dispatch_res_task(vnet_t *vnetp);
95 static void vnet_res_start_task(void *arg);
96 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
97 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
98 
99 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name,
100     vnet_res_t *vresp);
101 static int vnet_hio_update_kstats(kstat_t *ksp, int rw);
102 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp);
103 static void vnet_hio_destroy_kstats(kstat_t *ksp);
104 
105 /* Exported to to vnet_dds */
106 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
107 
108 /* Externs that are imported from vnet_gen */
109 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
110     const uint8_t *macaddr, void **vgenhdl);
111 extern void vgen_uninit(void *arg);
112 extern int vgen_dds_tx(void *arg, void *dmsg);
113 extern void vgen_mod_init(void);
114 extern int vgen_mod_cleanup(void);
115 extern void vgen_mod_fini(void);
116 
117 /* Externs that are imported from vnet_dds */
118 extern void vdds_mod_init(void);
119 extern void vdds_mod_fini(void);
120 extern int vdds_init(vnet_t *vnetp);
121 extern void vdds_cleanup(vnet_t *vnetp);
122 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
123 extern void vdds_cleanup_hybrid_res(void *arg);
124 
125 #define	DRV_NAME	"vnet"
126 #define	VNET_FDBE_REFHOLD(p)						\
127 {									\
128 	atomic_inc_32(&(p)->refcnt);					\
129 	ASSERT((p)->refcnt != 0);					\
130 }
131 
132 #define	VNET_FDBE_REFRELE(p)						\
133 {									\
134 	ASSERT((p)->refcnt != 0);					\
135 	atomic_dec_32(&(p)->refcnt);					\
136 }
137 
138 static mac_callbacks_t vnet_m_callbacks = {
139 	0,
140 	vnet_m_stat,
141 	vnet_m_start,
142 	vnet_m_stop,
143 	vnet_m_promisc,
144 	vnet_m_multicst,
145 	vnet_m_unicst,
146 	vnet_m_tx,
147 	NULL,
148 	NULL,
149 	NULL
150 };
151 
152 /*
153  * Linked list of "vnet_t" structures - one per instance.
154  */
155 static vnet_t	*vnet_headp = NULL;
156 static krwlock_t vnet_rw;
157 
158 /* Tunables */
159 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
160 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
161 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
162 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
163 
164 /*
165  * Set this to non-zero to enable additional internal receive buffer pools
166  * based on the MTU of the device for better performance at the cost of more
167  * memory consumption. This is turned off by default, to use allocb(9F) for
168  * receive buffer allocations of sizes > 2K.
169  */
170 boolean_t vnet_jumbo_rxpools = B_FALSE;
171 
172 /* # of chains in fdb hash table */
173 uint32_t	vnet_fdb_nchains = VNET_NFDB_HASH;
174 
175 /* Internal tunables */
176 uint32_t	vnet_ethermtu = 1500;	/* mtu of the device */
177 
178 /*
179  * Default vlan id. This is only used internally when the "default-vlan-id"
180  * property is not present in the MD device node. Therefore, this should not be
181  * used as a tunable; if this value is changed, the corresponding variable
182  * should be updated to the same value in vsw and also other vnets connected to
183  * the same vsw.
184  */
185 uint16_t	vnet_default_vlan_id = 1;
186 
187 /* delay in usec to wait for all references on a fdb entry to be dropped */
188 uint32_t vnet_fdbe_refcnt_delay = 10;
189 
190 static struct ether_addr etherbroadcastaddr = {
191 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
192 };
193 
194 
195 /*
196  * Property names
197  */
198 static char macaddr_propname[] = "local-mac-address";
199 
200 /*
201  * This is the string displayed by modinfo(1m).
202  */
203 static char vnet_ident[] = "vnet driver";
204 extern struct mod_ops mod_driverops;
205 static struct cb_ops cb_vnetops = {
206 	nulldev,		/* cb_open */
207 	nulldev,		/* cb_close */
208 	nodev,			/* cb_strategy */
209 	nodev,			/* cb_print */
210 	nodev,			/* cb_dump */
211 	nodev,			/* cb_read */
212 	nodev,			/* cb_write */
213 	nodev,			/* cb_ioctl */
214 	nodev,			/* cb_devmap */
215 	nodev,			/* cb_mmap */
216 	nodev,			/* cb_segmap */
217 	nochpoll,		/* cb_chpoll */
218 	ddi_prop_op,		/* cb_prop_op */
219 	NULL,			/* cb_stream */
220 	(int)(D_MP)		/* cb_flag */
221 };
222 
223 static struct dev_ops vnetops = {
224 	DEVO_REV,		/* devo_rev */
225 	0,			/* devo_refcnt */
226 	NULL,			/* devo_getinfo */
227 	nulldev,		/* devo_identify */
228 	nulldev,		/* devo_probe */
229 	vnetattach,		/* devo_attach */
230 	vnetdetach,		/* devo_detach */
231 	nodev,			/* devo_reset */
232 	&cb_vnetops,		/* devo_cb_ops */
233 	(struct bus_ops *)NULL,	/* devo_bus_ops */
234 	NULL,			/* devo_power */
235 	ddi_quiesce_not_supported,	/* devo_quiesce */
236 };
237 
238 static struct modldrv modldrv = {
239 	&mod_driverops,		/* Type of module.  This one is a driver */
240 	vnet_ident,		/* ID string */
241 	&vnetops		/* driver specific ops */
242 };
243 
244 static struct modlinkage modlinkage = {
245 	MODREV_1, (void *)&modldrv, NULL
246 };
247 
248 #ifdef DEBUG
249 
250 /*
251  * Print debug messages - set to 0xf to enable all msgs
252  */
253 int vnet_dbglevel = 0x8;
254 
255 static void
256 debug_printf(const char *fname, void *arg, const char *fmt, ...)
257 {
258 	char    buf[512];
259 	va_list ap;
260 	vnet_t *vnetp = (vnet_t *)arg;
261 	char    *bufp = buf;
262 
263 	if (vnetp == NULL) {
264 		(void) sprintf(bufp, "%s: ", fname);
265 		bufp += strlen(bufp);
266 	} else {
267 		(void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
268 		bufp += strlen(bufp);
269 	}
270 	va_start(ap, fmt);
271 	(void) vsprintf(bufp, fmt, ap);
272 	va_end(ap);
273 	cmn_err(CE_CONT, "%s\n", buf);
274 }
275 
276 #endif
277 
278 /* _init(9E): initialize the loadable module */
279 int
280 _init(void)
281 {
282 	int status;
283 
284 	DBG1(NULL, "enter\n");
285 
286 	mac_init_ops(&vnetops, "vnet");
287 	status = mod_install(&modlinkage);
288 	if (status != 0) {
289 		mac_fini_ops(&vnetops);
290 	}
291 	vdds_mod_init();
292 	vgen_mod_init();
293 	DBG1(NULL, "exit(%d)\n", status);
294 	return (status);
295 }
296 
297 /* _fini(9E): prepare the module for unloading. */
298 int
299 _fini(void)
300 {
301 	int		status;
302 
303 	DBG1(NULL, "enter\n");
304 
305 	status = vgen_mod_cleanup();
306 	if (status != 0)
307 		return (status);
308 
309 	status = mod_remove(&modlinkage);
310 	if (status != 0)
311 		return (status);
312 	mac_fini_ops(&vnetops);
313 	vgen_mod_fini();
314 	vdds_mod_fini();
315 
316 	DBG1(NULL, "exit(%d)\n", status);
317 	return (status);
318 }
319 
320 /* _info(9E): return information about the loadable module */
321 int
322 _info(struct modinfo *modinfop)
323 {
324 	return (mod_info(&modlinkage, modinfop));
325 }
326 
327 /*
328  * attach(9E): attach a device to the system.
329  * called once for each instance of the device on the system.
330  */
331 static int
332 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
333 {
334 	vnet_t			*vnetp;
335 	int			status;
336 	int			instance;
337 	uint64_t		reg;
338 	char			qname[TASKQ_NAMELEN];
339 	vnet_attach_progress_t	attach_progress;
340 
341 	attach_progress = AST_init;
342 
343 	switch (cmd) {
344 	case DDI_ATTACH:
345 		break;
346 	case DDI_RESUME:
347 	case DDI_PM_RESUME:
348 	default:
349 		goto vnet_attach_fail;
350 	}
351 
352 	instance = ddi_get_instance(dip);
353 	DBG1(NULL, "instance(%d) enter\n", instance);
354 
355 	/* allocate vnet_t and mac_t structures */
356 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
357 	vnetp->dip = dip;
358 	vnetp->instance = instance;
359 	rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL);
360 	rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
361 	attach_progress |= AST_vnet_alloc;
362 
363 	status = vdds_init(vnetp);
364 	if (status != 0) {
365 		goto vnet_attach_fail;
366 	}
367 	attach_progress |= AST_vdds_init;
368 
369 	/* setup links to vnet_t from both devinfo and mac_t */
370 	ddi_set_driver_private(dip, (caddr_t)vnetp);
371 
372 	/* read the mac address */
373 	status = vnet_read_mac_address(vnetp);
374 	if (status != DDI_SUCCESS) {
375 		goto vnet_attach_fail;
376 	}
377 	attach_progress |= AST_read_macaddr;
378 
379 	reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
380 	    DDI_PROP_DONTPASS, "reg", -1);
381 	if (reg == -1) {
382 		goto vnet_attach_fail;
383 	}
384 	vnetp->reg = reg;
385 
386 	vnet_fdb_create(vnetp);
387 	attach_progress |= AST_fdbh_alloc;
388 
389 	(void) snprintf(qname, TASKQ_NAMELEN, "vnet_taskq%d", instance);
390 	if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1,
391 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
392 		cmn_err(CE_WARN, "!vnet%d: Unable to create task queue",
393 		    instance);
394 		goto vnet_attach_fail;
395 	}
396 	attach_progress |= AST_taskq_create;
397 
398 	/* add to the list of vnet devices */
399 	WRITE_ENTER(&vnet_rw);
400 	vnetp->nextp = vnet_headp;
401 	vnet_headp = vnetp;
402 	RW_EXIT(&vnet_rw);
403 
404 	attach_progress |= AST_vnet_list;
405 
406 	/*
407 	 * Initialize the generic vnet plugin which provides
408 	 * communication via sun4v LDC (logical domain channel) based
409 	 * resources. It will register the LDC resources as and when
410 	 * they become available.
411 	 */
412 	status = vgen_init(vnetp, reg, vnetp->dip,
413 	    (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
414 	if (status != DDI_SUCCESS) {
415 		DERR(vnetp, "vgen_init() failed\n");
416 		goto vnet_attach_fail;
417 	}
418 	attach_progress |= AST_vgen_init;
419 
420 	/* register with MAC layer */
421 	status = vnet_mac_register(vnetp);
422 	if (status != DDI_SUCCESS) {
423 		goto vnet_attach_fail;
424 	}
425 
426 	attach_progress |= AST_macreg;
427 
428 	vnetp->attach_progress = attach_progress;
429 
430 	DBG1(NULL, "instance(%d) exit\n", instance);
431 	return (DDI_SUCCESS);
432 
433 vnet_attach_fail:
434 	vnetp->attach_progress = attach_progress;
435 	status = vnet_unattach(vnetp);
436 	ASSERT(status == 0);
437 	return (DDI_FAILURE);
438 }
439 
440 /*
441  * detach(9E): detach a device from the system.
442  */
443 static int
444 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
445 {
446 	vnet_t		*vnetp;
447 	int		instance;
448 
449 	instance = ddi_get_instance(dip);
450 	DBG1(NULL, "instance(%d) enter\n", instance);
451 
452 	vnetp = ddi_get_driver_private(dip);
453 	if (vnetp == NULL) {
454 		goto vnet_detach_fail;
455 	}
456 
457 	switch (cmd) {
458 	case DDI_DETACH:
459 		break;
460 	case DDI_SUSPEND:
461 	case DDI_PM_SUSPEND:
462 	default:
463 		goto vnet_detach_fail;
464 	}
465 
466 	if (vnet_unattach(vnetp) != 0) {
467 		goto vnet_detach_fail;
468 	}
469 
470 	return (DDI_SUCCESS);
471 
472 vnet_detach_fail:
473 	return (DDI_FAILURE);
474 }
475 
476 /*
477  * Common routine to handle vnetattach() failure and vnetdetach(). Note that
478  * the only reason this function could fail is if mac_unregister() fails.
479  * Otherwise, this function must ensure that all resources are freed and return
480  * success.
481  */
482 static int
483 vnet_unattach(vnet_t *vnetp)
484 {
485 	vnet_attach_progress_t	attach_progress;
486 
487 	attach_progress = vnetp->attach_progress;
488 
489 	/*
490 	 * Unregister from the gldv3 subsystem. This can fail, in particular
491 	 * if there are still any open references to this mac device; in which
492 	 * case we just return failure without continuing to detach further.
493 	 */
494 	if (attach_progress & AST_macreg) {
495 		if (mac_unregister(vnetp->mh) != 0) {
496 			return (1);
497 		}
498 		attach_progress &= ~AST_macreg;
499 	}
500 
501 	/*
502 	 * Now that we have unregistered from gldv3, we must finish all other
503 	 * steps and successfully return from this function; otherwise we will
504 	 * end up leaving the device in a broken/unusable state.
505 	 *
506 	 * First, release any hybrid resources assigned to this vnet device.
507 	 */
508 	if (attach_progress & AST_vdds_init) {
509 		vdds_cleanup(vnetp);
510 		attach_progress &= ~AST_vdds_init;
511 	}
512 
513 	/*
514 	 * Uninit vgen. This stops further mdeg callbacks to this vnet
515 	 * device and/or its ports; and detaches any existing ports.
516 	 */
517 	if (attach_progress & AST_vgen_init) {
518 		vgen_uninit(vnetp->vgenhdl);
519 		attach_progress &= ~AST_vgen_init;
520 	}
521 
522 	/* Destroy the taskq. */
523 	if (attach_progress & AST_taskq_create) {
524 		ddi_taskq_destroy(vnetp->taskqp);
525 		attach_progress &= ~AST_taskq_create;
526 	}
527 
528 	/* Destroy fdb. */
529 	if (attach_progress & AST_fdbh_alloc) {
530 		vnet_fdb_destroy(vnetp);
531 		attach_progress &= ~AST_fdbh_alloc;
532 	}
533 
534 	/* Remove from the device list */
535 	if (attach_progress & AST_vnet_list) {
536 		vnet_t		**vnetpp;
537 		/* unlink from instance(vnet_t) list */
538 		WRITE_ENTER(&vnet_rw);
539 		for (vnetpp = &vnet_headp; *vnetpp;
540 		    vnetpp = &(*vnetpp)->nextp) {
541 			if (*vnetpp == vnetp) {
542 				*vnetpp = vnetp->nextp;
543 				break;
544 			}
545 		}
546 		RW_EXIT(&vnet_rw);
547 		attach_progress &= ~AST_vnet_list;
548 	}
549 
550 	if (attach_progress & AST_vnet_alloc) {
551 		rw_destroy(&vnetp->vrwlock);
552 		rw_destroy(&vnetp->vsw_fp_rw);
553 		attach_progress &= ~AST_vnet_list;
554 		KMEM_FREE(vnetp);
555 	}
556 
557 	return (0);
558 }
559 
560 /* enable the device for transmit/receive */
561 static int
562 vnet_m_start(void *arg)
563 {
564 	vnet_t		*vnetp = arg;
565 
566 	DBG1(vnetp, "enter\n");
567 
568 	WRITE_ENTER(&vnetp->vrwlock);
569 	vnetp->flags |= VNET_STARTED;
570 	vnet_start_resources(vnetp);
571 	RW_EXIT(&vnetp->vrwlock);
572 
573 	DBG1(vnetp, "exit\n");
574 	return (VNET_SUCCESS);
575 
576 }
577 
578 /* stop transmit/receive for the device */
579 static void
580 vnet_m_stop(void *arg)
581 {
582 	vnet_t		*vnetp = arg;
583 
584 	DBG1(vnetp, "enter\n");
585 
586 	WRITE_ENTER(&vnetp->vrwlock);
587 	if (vnetp->flags & VNET_STARTED) {
588 		vnet_stop_resources(vnetp);
589 		vnetp->flags &= ~VNET_STARTED;
590 	}
591 	RW_EXIT(&vnetp->vrwlock);
592 
593 	DBG1(vnetp, "exit\n");
594 }
595 
596 /* set the unicast mac address of the device */
597 static int
598 vnet_m_unicst(void *arg, const uint8_t *macaddr)
599 {
600 	_NOTE(ARGUNUSED(macaddr))
601 
602 	vnet_t *vnetp = arg;
603 
604 	DBG1(vnetp, "enter\n");
605 	/*
606 	 * NOTE: setting mac address dynamically is not supported.
607 	 */
608 	DBG1(vnetp, "exit\n");
609 
610 	return (VNET_FAILURE);
611 }
612 
613 /* enable/disable a multicast address */
614 static int
615 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
616 {
617 	_NOTE(ARGUNUSED(add, mca))
618 
619 	vnet_t *vnetp = arg;
620 	vnet_res_t	*vresp;
621 	mac_register_t	*macp;
622 	mac_callbacks_t	*cbp;
623 	int rv = VNET_SUCCESS;
624 
625 	DBG1(vnetp, "enter\n");
626 
627 	READ_ENTER(&vnetp->vrwlock);
628 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
629 		if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
630 			macp = &vresp->macreg;
631 			cbp = macp->m_callbacks;
632 			rv = cbp->mc_multicst(macp->m_driver, add, mca);
633 		}
634 	}
635 	RW_EXIT(&vnetp->vrwlock);
636 
637 	DBG1(vnetp, "exit(%d)\n", rv);
638 	return (rv);
639 }
640 
641 /* set or clear promiscuous mode on the device */
642 static int
643 vnet_m_promisc(void *arg, boolean_t on)
644 {
645 	_NOTE(ARGUNUSED(on))
646 
647 	vnet_t *vnetp = arg;
648 	DBG1(vnetp, "enter\n");
649 	/*
650 	 * NOTE: setting promiscuous mode is not supported, just return success.
651 	 */
652 	DBG1(vnetp, "exit\n");
653 	return (VNET_SUCCESS);
654 }
655 
656 /*
657  * Transmit a chain of packets. This function provides switching functionality
658  * based on the destination mac address to reach other guests (within ldoms) or
659  * external hosts.
660  */
661 mblk_t *
662 vnet_m_tx(void *arg, mblk_t *mp)
663 {
664 	vnet_t			*vnetp;
665 	vnet_res_t		*vresp;
666 	mblk_t			*next;
667 	mblk_t			*resid_mp;
668 	mac_register_t		*macp;
669 	struct ether_header	*ehp;
670 	boolean_t		is_unicast;
671 	boolean_t		is_pvid;	/* non-default pvid ? */
672 	boolean_t		hres;		/* Hybrid resource ? */
673 
674 	vnetp = (vnet_t *)arg;
675 	DBG1(vnetp, "enter\n");
676 	ASSERT(mp != NULL);
677 
678 	is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE;
679 
680 	while (mp != NULL) {
681 
682 		next = mp->b_next;
683 		mp->b_next = NULL;
684 
685 		/*
686 		 * Find fdb entry for the destination
687 		 * and hold a reference to it.
688 		 */
689 		ehp = (struct ether_header *)mp->b_rptr;
690 		vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
691 		if (vresp != NULL) {
692 
693 			/*
694 			 * Destination found in FDB.
695 			 * The destination is a vnet device within ldoms
696 			 * and directly reachable, invoke the tx function
697 			 * in the fdb entry.
698 			 */
699 			macp = &vresp->macreg;
700 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
701 
702 			/* tx done; now release ref on fdb entry */
703 			VNET_FDBE_REFRELE(vresp);
704 
705 			if (resid_mp != NULL) {
706 				/* m_tx failed */
707 				mp->b_next = next;
708 				break;
709 			}
710 		} else {
711 			is_unicast = !(IS_BROADCAST(ehp) ||
712 			    (IS_MULTICAST(ehp)));
713 			/*
714 			 * Destination is not in FDB.
715 			 * If the destination is broadcast or multicast,
716 			 * then forward the packet to vswitch.
717 			 * If a Hybrid resource avilable, then send the
718 			 * unicast packet via hybrid resource, otherwise
719 			 * forward it to vswitch.
720 			 */
721 			READ_ENTER(&vnetp->vsw_fp_rw);
722 
723 			if ((is_unicast) && (vnetp->hio_fp != NULL)) {
724 				vresp = vnetp->hio_fp;
725 				hres = B_TRUE;
726 			} else {
727 				vresp = vnetp->vsw_fp;
728 				hres = B_FALSE;
729 			}
730 			if (vresp == NULL) {
731 				/*
732 				 * no fdb entry to vsw? drop the packet.
733 				 */
734 				RW_EXIT(&vnetp->vsw_fp_rw);
735 				freemsg(mp);
736 				mp = next;
737 				continue;
738 			}
739 
740 			/* ref hold the fdb entry to vsw */
741 			VNET_FDBE_REFHOLD(vresp);
742 
743 			RW_EXIT(&vnetp->vsw_fp_rw);
744 
745 			/*
746 			 * In the case of a hybrid resource we need to insert
747 			 * the tag for the pvid case here; unlike packets that
748 			 * are destined to a vnet/vsw in which case the vgen
749 			 * layer does the tagging before sending it over ldc.
750 			 */
751 			if (hres == B_TRUE) {
752 				/*
753 				 * Determine if the frame being transmitted
754 				 * over the hybrid resource is untagged. If so,
755 				 * insert the tag before transmitting.
756 				 */
757 				if (is_pvid == B_TRUE &&
758 				    ehp->ether_type != htons(ETHERTYPE_VLAN)) {
759 
760 					mp = vnet_vlan_insert_tag(mp,
761 					    vnetp->pvid);
762 					if (mp == NULL) {
763 						VNET_FDBE_REFRELE(vresp);
764 						mp = next;
765 						continue;
766 					}
767 
768 				}
769 			}
770 
771 			macp = &vresp->macreg;
772 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
773 
774 			/* tx done; now release ref on fdb entry */
775 			VNET_FDBE_REFRELE(vresp);
776 
777 			if (resid_mp != NULL) {
778 				/* m_tx failed */
779 				mp->b_next = next;
780 				break;
781 			}
782 		}
783 
784 		mp = next;
785 	}
786 
787 	DBG1(vnetp, "exit\n");
788 	return (mp);
789 }
790 
791 /* get statistics from the device */
792 int
793 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
794 {
795 	vnet_t *vnetp = arg;
796 	vnet_res_t	*vresp;
797 	mac_register_t	*macp;
798 	mac_callbacks_t	*cbp;
799 	uint64_t val_total = 0;
800 
801 	DBG1(vnetp, "enter\n");
802 
803 	/*
804 	 * get the specified statistic from each transport and return the
805 	 * aggregate val.  This obviously only works for counters.
806 	 */
807 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
808 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
809 		return (ENOTSUP);
810 	}
811 
812 	READ_ENTER(&vnetp->vrwlock);
813 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
814 		macp = &vresp->macreg;
815 		cbp = macp->m_callbacks;
816 		if (cbp->mc_getstat(macp->m_driver, stat, val) == 0)
817 			val_total += *val;
818 	}
819 	RW_EXIT(&vnetp->vrwlock);
820 
821 	*val = val_total;
822 
823 	DBG1(vnetp, "exit\n");
824 	return (0);
825 }
826 
827 /* wrapper function for mac_register() */
828 static int
829 vnet_mac_register(vnet_t *vnetp)
830 {
831 	mac_register_t	*macp;
832 	int		err;
833 
834 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
835 		return (DDI_FAILURE);
836 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
837 	macp->m_driver = vnetp;
838 	macp->m_dip = vnetp->dip;
839 	macp->m_src_addr = vnetp->curr_macaddr;
840 	macp->m_callbacks = &vnet_m_callbacks;
841 	macp->m_min_sdu = 0;
842 	macp->m_max_sdu = vnetp->mtu;
843 	macp->m_margin = VLAN_TAGSZ;
844 
845 	/*
846 	 * Finally, we're ready to register ourselves with the MAC layer
847 	 * interface; if this succeeds, we're all ready to start()
848 	 */
849 	err = mac_register(macp, &vnetp->mh);
850 	mac_free(macp);
851 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
852 }
853 
854 /* read the mac address of the device */
855 static int
856 vnet_read_mac_address(vnet_t *vnetp)
857 {
858 	uchar_t 	*macaddr;
859 	uint32_t 	size;
860 	int 		rv;
861 
862 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
863 	    DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
864 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
865 		DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
866 		    macaddr_propname, rv);
867 		return (DDI_FAILURE);
868 	}
869 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
870 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
871 	ddi_prop_free(macaddr);
872 
873 	return (DDI_SUCCESS);
874 }
875 
876 static void
877 vnet_fdb_create(vnet_t *vnetp)
878 {
879 	char		hashname[MAXNAMELEN];
880 
881 	(void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
882 	    vnetp->instance);
883 	vnetp->fdb_nchains = vnet_fdb_nchains;
884 	vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
885 	    mod_hash_null_valdtor, sizeof (void *));
886 }
887 
888 static void
889 vnet_fdb_destroy(vnet_t *vnetp)
890 {
891 	/* destroy fdb-hash-table */
892 	if (vnetp->fdb_hashp != NULL) {
893 		mod_hash_destroy_hash(vnetp->fdb_hashp);
894 		vnetp->fdb_hashp = NULL;
895 		vnetp->fdb_nchains = 0;
896 	}
897 }
898 
899 /*
900  * Add an entry into the fdb.
901  */
902 void
903 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp)
904 {
905 	uint64_t	addr = 0;
906 	int		rv;
907 
908 	KEY_HASH(addr, vresp->rem_macaddr);
909 
910 	/*
911 	 * If the entry being added corresponds to LDC_SERVICE resource,
912 	 * that is, vswitch connection, it is added to the hash and also
913 	 * the entry is cached, an additional reference count reflects
914 	 * this. The HYBRID resource is not added to the hash, but only
915 	 * cached, as it is only used for sending out packets for unknown
916 	 * unicast destinations.
917 	 */
918 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
919 	    (vresp->refcnt = 1) : (vresp->refcnt = 0);
920 
921 	/*
922 	 * Note: duplicate keys will be rejected by mod_hash.
923 	 */
924 	if (vresp->type != VIO_NET_RES_HYBRID) {
925 		rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
926 		    (mod_hash_val_t)vresp);
927 		if (rv != 0) {
928 			DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
929 			return;
930 		}
931 	}
932 
933 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
934 		/* Cache the fdb entry to vsw-port */
935 		WRITE_ENTER(&vnetp->vsw_fp_rw);
936 		if (vnetp->vsw_fp == NULL)
937 			vnetp->vsw_fp = vresp;
938 		RW_EXIT(&vnetp->vsw_fp_rw);
939 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
940 		/* Cache the fdb entry to hybrid resource */
941 		WRITE_ENTER(&vnetp->vsw_fp_rw);
942 		if (vnetp->hio_fp == NULL)
943 			vnetp->hio_fp = vresp;
944 		RW_EXIT(&vnetp->vsw_fp_rw);
945 	}
946 }
947 
948 /*
949  * Remove an entry from fdb.
950  */
951 static void
952 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp)
953 {
954 	uint64_t	addr = 0;
955 	int		rv;
956 	uint32_t	refcnt;
957 	vnet_res_t	*tmp;
958 
959 	KEY_HASH(addr, vresp->rem_macaddr);
960 
961 	/*
962 	 * Remove the entry from fdb hash table.
963 	 * This prevents further references to this fdb entry.
964 	 */
965 	if (vresp->type != VIO_NET_RES_HYBRID) {
966 		rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
967 		    (mod_hash_val_t *)&tmp);
968 		if (rv != 0) {
969 			/*
970 			 * As the resources are added to the hash only
971 			 * after they are started, this can occur if
972 			 * a resource unregisters before it is ever started.
973 			 */
974 			return;
975 		}
976 	}
977 
978 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
979 		WRITE_ENTER(&vnetp->vsw_fp_rw);
980 
981 		ASSERT(tmp == vnetp->vsw_fp);
982 		vnetp->vsw_fp = NULL;
983 
984 		RW_EXIT(&vnetp->vsw_fp_rw);
985 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
986 		WRITE_ENTER(&vnetp->vsw_fp_rw);
987 
988 		vnetp->hio_fp = NULL;
989 
990 		RW_EXIT(&vnetp->vsw_fp_rw);
991 	}
992 
993 	/*
994 	 * If there are threads already ref holding before the entry was
995 	 * removed from hash table, then wait for ref count to drop to zero.
996 	 */
997 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
998 	    (refcnt = 1) : (refcnt = 0);
999 	while (vresp->refcnt > refcnt) {
1000 		delay(drv_usectohz(vnet_fdbe_refcnt_delay));
1001 	}
1002 }
1003 
1004 /*
1005  * Search fdb for a given mac address. If an entry is found, hold
1006  * a reference to it and return the entry; else returns NULL.
1007  */
1008 static vnet_res_t *
1009 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
1010 {
1011 	uint64_t	key = 0;
1012 	vnet_res_t	*vresp;
1013 	int		rv;
1014 
1015 	KEY_HASH(key, addrp->ether_addr_octet);
1016 
1017 	rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
1018 	    (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb);
1019 
1020 	if (rv != 0)
1021 		return (NULL);
1022 
1023 	return (vresp);
1024 }
1025 
1026 /*
1027  * Callback function provided to mod_hash_find_cb(). After finding the fdb
1028  * entry corresponding to the key (macaddr), this callback will be invoked by
1029  * mod_hash_find_cb() to atomically increment the reference count on the fdb
1030  * entry before returning the found entry.
1031  */
1032 static void
1033 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
1034 {
1035 	_NOTE(ARGUNUSED(key))
1036 	VNET_FDBE_REFHOLD((vnet_res_t *)val);
1037 }
1038 
1039 /*
1040  * Frames received that are tagged with the pvid of the vnet device must be
1041  * untagged before sending up the stack. This function walks the chain of rx
1042  * frames, untags any such frames and returns the updated chain.
1043  *
1044  * Arguments:
1045  *    pvid:  pvid of the vnet device for which packets are being received
1046  *    mp:    head of pkt chain to be validated and untagged
1047  *
1048  * Returns:
1049  *    mp:    head of updated chain of packets
1050  */
1051 static void
1052 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp)
1053 {
1054 	struct ether_vlan_header	*evhp;
1055 	mblk_t				*bp;
1056 	mblk_t				*bpt;
1057 	mblk_t				*bph;
1058 	mblk_t				*bpn;
1059 
1060 	bpn = bph = bpt = NULL;
1061 
1062 	for (bp = *mp; bp != NULL; bp = bpn) {
1063 
1064 		bpn = bp->b_next;
1065 		bp->b_next = bp->b_prev = NULL;
1066 
1067 		evhp = (struct ether_vlan_header *)bp->b_rptr;
1068 
1069 		if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN &&
1070 		    VLAN_ID(ntohs(evhp->ether_tci)) == pvid) {
1071 
1072 			bp = vnet_vlan_remove_tag(bp);
1073 			if (bp == NULL) {
1074 				continue;
1075 			}
1076 
1077 		}
1078 
1079 		/* build a chain of processed packets */
1080 		if (bph == NULL) {
1081 			bph = bpt = bp;
1082 		} else {
1083 			bpt->b_next = bp;
1084 			bpt = bp;
1085 		}
1086 
1087 	}
1088 
1089 	*mp = bph;
1090 }
1091 
1092 static void
1093 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
1094 {
1095 	vnet_res_t	*vresp = (vnet_res_t *)vrh;
1096 	vnet_t		*vnetp = vresp->vnetp;
1097 
1098 	if ((vnetp == NULL) || (vnetp->mh == 0)) {
1099 		freemsgchain(mp);
1100 		return;
1101 	}
1102 
1103 	/*
1104 	 * Packets received over a hybrid resource need additional processing
1105 	 * to remove the tag, for the pvid case. The underlying resource is
1106 	 * not aware of the vnet's pvid and thus packets are received with the
1107 	 * vlan tag in the header; unlike packets that are received over a ldc
1108 	 * channel in which case the peer vnet/vsw would have already removed
1109 	 * the tag.
1110 	 */
1111 	if (vresp->type == VIO_NET_RES_HYBRID &&
1112 	    vnetp->pvid != vnetp->default_vlan_id) {
1113 
1114 		vnet_rx_frames_untag(vnetp->pvid, &mp);
1115 		if (mp == NULL) {
1116 			return;
1117 		}
1118 	}
1119 
1120 	mac_rx(vnetp->mh, NULL, mp);
1121 }
1122 
1123 void
1124 vnet_tx_update(vio_net_handle_t vrh)
1125 {
1126 	vnet_res_t *vresp = (vnet_res_t *)vrh;
1127 	vnet_t *vnetp = vresp->vnetp;
1128 
1129 	if ((vnetp != NULL) && (vnetp->mh != NULL)) {
1130 		mac_tx_update(vnetp->mh);
1131 	}
1132 }
1133 
1134 /*
1135  * Update the new mtu of vnet into the mac layer. First check if the device has
1136  * been plumbed and if so fail the mtu update. Returns 0 on success.
1137  */
1138 int
1139 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu)
1140 {
1141 	int	rv;
1142 
1143 	if (vnetp == NULL || vnetp->mh == NULL) {
1144 		return (EINVAL);
1145 	}
1146 
1147 	WRITE_ENTER(&vnetp->vrwlock);
1148 
1149 	if (vnetp->flags & VNET_STARTED) {
1150 		RW_EXIT(&vnetp->vrwlock);
1151 		cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu "
1152 		    "update as the device is plumbed\n",
1153 		    vnetp->instance);
1154 		return (EBUSY);
1155 	}
1156 
1157 	/* update mtu in the mac layer */
1158 	rv = mac_maxsdu_update(vnetp->mh, mtu);
1159 	if (rv != 0) {
1160 		RW_EXIT(&vnetp->vrwlock);
1161 		cmn_err(CE_NOTE,
1162 		    "!vnet%d: Unable to update mtu with mac layer\n",
1163 		    vnetp->instance);
1164 		return (EIO);
1165 	}
1166 
1167 	vnetp->mtu = mtu;
1168 
1169 	RW_EXIT(&vnetp->vrwlock);
1170 
1171 	return (0);
1172 }
1173 
1174 /*
1175  * vio_net_resource_reg -- An interface called to register a resource
1176  *	with vnet.
1177  *	macp -- a GLDv3 mac_register that has all the details of
1178  *		a resource and its callbacks etc.
1179  *	type -- resource type.
1180  *	local_macaddr -- resource's MAC address. This is used to
1181  *			 associate a resource with a corresponding vnet.
1182  *	remote_macaddr -- remote side MAC address. This is ignored for
1183  *			  the Hybrid resources.
1184  *	vhp -- A handle returned to the caller.
1185  *	vcb -- A set of callbacks provided to the callers.
1186  */
1187 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
1188     ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
1189     vio_net_callbacks_t *vcb)
1190 {
1191 	vnet_t	*vnetp;
1192 	vnet_res_t *vresp;
1193 
1194 	vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
1195 	ether_copy(local_macaddr, vresp->local_macaddr);
1196 	ether_copy(rem_macaddr, vresp->rem_macaddr);
1197 	vresp->type = type;
1198 	bcopy(macp, &vresp->macreg, sizeof (mac_register_t));
1199 
1200 	DBG1(NULL, "Resource Registerig type=0%X\n", type);
1201 
1202 	READ_ENTER(&vnet_rw);
1203 	vnetp = vnet_headp;
1204 	while (vnetp != NULL) {
1205 		if (VNET_MATCH_RES(vresp, vnetp)) {
1206 			vresp->vnetp = vnetp;
1207 
1208 			/* Setup kstats for hio resource */
1209 			if (vresp->type == VIO_NET_RES_HYBRID) {
1210 				vresp->ksp = vnet_hio_setup_kstats(DRV_NAME,
1211 				    "hio", vresp);
1212 				if (vresp->ksp == NULL) {
1213 					cmn_err(CE_NOTE, "!vnet%d: Cannot "
1214 					    "create kstats for hio resource",
1215 					    vnetp->instance);
1216 				}
1217 			}
1218 
1219 			WRITE_ENTER(&vnetp->vrwlock);
1220 			vresp->nextp = vnetp->vres_list;
1221 			vnetp->vres_list = vresp;
1222 			RW_EXIT(&vnetp->vrwlock);
1223 			break;
1224 		}
1225 		vnetp = vnetp->nextp;
1226 	}
1227 	RW_EXIT(&vnet_rw);
1228 	if (vresp->vnetp == NULL) {
1229 		DWARN(NULL, "No vnet instance");
1230 		kmem_free(vresp, sizeof (vnet_res_t));
1231 		return (ENXIO);
1232 	}
1233 
1234 	*vhp = vresp;
1235 	vcb->vio_net_rx_cb = vnet_rx;
1236 	vcb->vio_net_tx_update = vnet_tx_update;
1237 	vcb->vio_net_report_err = vnet_handle_res_err;
1238 
1239 	/* Dispatch a task to start resources */
1240 	vnet_dispatch_res_task(vnetp);
1241 	return (0);
1242 }
1243 
1244 /*
1245  * vio_net_resource_unreg -- An interface to unregister a resource.
1246  */
1247 void
1248 vio_net_resource_unreg(vio_net_handle_t vhp)
1249 {
1250 	vnet_res_t *vresp = (vnet_res_t *)vhp;
1251 	vnet_t *vnetp = vresp->vnetp;
1252 	vnet_res_t *vrp;
1253 	kstat_t *ksp = NULL;
1254 
1255 	DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
1256 
1257 	ASSERT(vnetp != NULL);
1258 	vnet_fdbe_del(vnetp, vresp);
1259 
1260 	WRITE_ENTER(&vnetp->vrwlock);
1261 	if (vresp == vnetp->vres_list) {
1262 		vnetp->vres_list = vresp->nextp;
1263 	} else {
1264 		vrp = vnetp->vres_list;
1265 		while (vrp->nextp != NULL) {
1266 			if (vrp->nextp == vresp) {
1267 				vrp->nextp = vresp->nextp;
1268 				break;
1269 			}
1270 			vrp = vrp->nextp;
1271 		}
1272 	}
1273 
1274 	ksp = vresp->ksp;
1275 	vresp->ksp = NULL;
1276 
1277 	vresp->vnetp = NULL;
1278 	vresp->nextp = NULL;
1279 	RW_EXIT(&vnetp->vrwlock);
1280 	vnet_hio_destroy_kstats(ksp);
1281 	KMEM_FREE(vresp);
1282 }
1283 
1284 /*
1285  * vnet_dds_rx -- an interface called by vgen to DDS messages.
1286  */
1287 void
1288 vnet_dds_rx(void *arg, void *dmsg)
1289 {
1290 	vnet_t *vnetp = arg;
1291 	vdds_process_dds_msg(vnetp, dmsg);
1292 }
1293 
1294 /*
1295  * vnet_send_dds_msg -- An interface provided to DDS to send
1296  *	DDS messages. This simply sends meessages via vgen.
1297  */
1298 int
1299 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg)
1300 {
1301 	int rv;
1302 
1303 	if (vnetp->vgenhdl != NULL) {
1304 		rv = vgen_dds_tx(vnetp->vgenhdl, dmsg);
1305 	}
1306 	return (rv);
1307 }
1308 
1309 /*
1310  * vnet_handle_res_err -- A callback function called by a resource
1311  *	to report an error. For example, vgen can call to report
1312  *	an LDC down/reset event. This will trigger cleanup of associated
1313  *	Hybrid resource.
1314  */
1315 /* ARGSUSED */
1316 static void
1317 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err)
1318 {
1319 	vnet_res_t *vresp = (vnet_res_t *)vrh;
1320 	vnet_t *vnetp = vresp->vnetp;
1321 	int rv;
1322 
1323 	if (vnetp == NULL) {
1324 		return;
1325 	}
1326 	if ((vresp->type != VIO_NET_RES_LDC_SERVICE) &&
1327 	    (vresp->type != VIO_NET_RES_HYBRID)) {
1328 		return;
1329 	}
1330 	rv = ddi_taskq_dispatch(vnetp->taskqp, vdds_cleanup_hybrid_res,
1331 	    vnetp, DDI_NOSLEEP);
1332 	if (rv != DDI_SUCCESS) {
1333 		cmn_err(CE_WARN,
1334 		    "vnet%d:Failed to dispatch task to cleanup hybrid resource",
1335 		    vnetp->instance);
1336 	}
1337 }
1338 
1339 /*
1340  * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
1341  */
1342 static void
1343 vnet_dispatch_res_task(vnet_t *vnetp)
1344 {
1345 	int rv;
1346 
1347 	WRITE_ENTER(&vnetp->vrwlock);
1348 	if (vnetp->flags & VNET_STARTED) {
1349 		rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task,
1350 		    vnetp, DDI_NOSLEEP);
1351 		if (rv != DDI_SUCCESS) {
1352 			cmn_err(CE_WARN,
1353 			    "vnet%d:Can't dispatch start resource task",
1354 			    vnetp->instance);
1355 		}
1356 	}
1357 	RW_EXIT(&vnetp->vrwlock);
1358 }
1359 
1360 /*
1361  * vnet_res_start_task -- A taskq callback function that starts a resource.
1362  */
1363 static void
1364 vnet_res_start_task(void *arg)
1365 {
1366 	vnet_t *vnetp = arg;
1367 
1368 	WRITE_ENTER(&vnetp->vrwlock);
1369 	if (vnetp->flags & VNET_STARTED) {
1370 		vnet_start_resources(vnetp);
1371 	}
1372 	RW_EXIT(&vnetp->vrwlock);
1373 }
1374 
1375 /*
1376  * vnet_start_resources -- starts all resources associated with
1377  *	a vnet.
1378  */
1379 static void
1380 vnet_start_resources(vnet_t *vnetp)
1381 {
1382 	mac_register_t	*macp;
1383 	mac_callbacks_t	*cbp;
1384 	vnet_res_t	*vresp;
1385 	int rv;
1386 
1387 	DBG1(vnetp, "enter\n");
1388 
1389 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
1390 		/* skip if it is already started */
1391 		if (vresp->flags & VNET_STARTED) {
1392 			continue;
1393 		}
1394 		macp = &vresp->macreg;
1395 		cbp = macp->m_callbacks;
1396 		rv = cbp->mc_start(macp->m_driver);
1397 		if (rv == 0) {
1398 			/*
1399 			 * Successfully started the resource, so now
1400 			 * add it to the fdb.
1401 			 */
1402 			vresp->flags |= VNET_STARTED;
1403 			vnet_fdbe_add(vnetp, vresp);
1404 		}
1405 	}
1406 
1407 	DBG1(vnetp, "exit\n");
1408 
1409 }
1410 
1411 /*
1412  * vnet_stop_resources -- stop all resources associated with a vnet.
1413  */
1414 static void
1415 vnet_stop_resources(vnet_t *vnetp)
1416 {
1417 	vnet_res_t	*vresp;
1418 	vnet_res_t	*nvresp;
1419 	mac_register_t	*macp;
1420 	mac_callbacks_t	*cbp;
1421 
1422 	DBG1(vnetp, "enter\n");
1423 
1424 	for (vresp = vnetp->vres_list; vresp != NULL; ) {
1425 		nvresp = vresp->nextp;
1426 		if (vresp->flags & VNET_STARTED) {
1427 			macp = &vresp->macreg;
1428 			cbp = macp->m_callbacks;
1429 			cbp->mc_stop(macp->m_driver);
1430 			vresp->flags &= ~VNET_STARTED;
1431 		}
1432 		vresp = nvresp;
1433 	}
1434 	DBG1(vnetp, "exit\n");
1435 }
1436 
1437 /*
1438  * Setup kstats for the HIO statistics.
1439  * NOTE: the synchronization for the statistics is the
1440  * responsibility of the caller.
1441  */
1442 kstat_t *
1443 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp)
1444 {
1445 	kstat_t *ksp;
1446 	vnet_t *vnetp = vresp->vnetp;
1447 	vnet_hio_kstats_t *hiokp;
1448 	size_t size;
1449 
1450 	ASSERT(vnetp != NULL);
1451 	size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t);
1452 	ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net",
1453 	    KSTAT_TYPE_NAMED, size, 0);
1454 	if (ksp == NULL) {
1455 		return (NULL);
1456 	}
1457 
1458 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1459 	kstat_named_init(&hiokp->ipackets,		"ipackets",
1460 	    KSTAT_DATA_ULONG);
1461 	kstat_named_init(&hiokp->ierrors,		"ierrors",
1462 	    KSTAT_DATA_ULONG);
1463 	kstat_named_init(&hiokp->opackets,		"opackets",
1464 	    KSTAT_DATA_ULONG);
1465 	kstat_named_init(&hiokp->oerrors,		"oerrors",
1466 	    KSTAT_DATA_ULONG);
1467 
1468 
1469 	/* MIB II kstat variables */
1470 	kstat_named_init(&hiokp->rbytes,		"rbytes",
1471 	    KSTAT_DATA_ULONG);
1472 	kstat_named_init(&hiokp->obytes,		"obytes",
1473 	    KSTAT_DATA_ULONG);
1474 	kstat_named_init(&hiokp->multircv,		"multircv",
1475 	    KSTAT_DATA_ULONG);
1476 	kstat_named_init(&hiokp->multixmt,		"multixmt",
1477 	    KSTAT_DATA_ULONG);
1478 	kstat_named_init(&hiokp->brdcstrcv,		"brdcstrcv",
1479 	    KSTAT_DATA_ULONG);
1480 	kstat_named_init(&hiokp->brdcstxmt,		"brdcstxmt",
1481 	    KSTAT_DATA_ULONG);
1482 	kstat_named_init(&hiokp->norcvbuf,		"norcvbuf",
1483 	    KSTAT_DATA_ULONG);
1484 	kstat_named_init(&hiokp->noxmtbuf,		"noxmtbuf",
1485 	    KSTAT_DATA_ULONG);
1486 
1487 	ksp->ks_update = vnet_hio_update_kstats;
1488 	ksp->ks_private = (void *)vresp;
1489 	kstat_install(ksp);
1490 	return (ksp);
1491 }
1492 
1493 /*
1494  * Destroy kstats.
1495  */
1496 static void
1497 vnet_hio_destroy_kstats(kstat_t *ksp)
1498 {
1499 	if (ksp != NULL)
1500 		kstat_delete(ksp);
1501 }
1502 
1503 /*
1504  * Update the kstats.
1505  */
1506 static int
1507 vnet_hio_update_kstats(kstat_t *ksp, int rw)
1508 {
1509 	vnet_t *vnetp;
1510 	vnet_res_t *vresp;
1511 	vnet_hio_stats_t statsp;
1512 	vnet_hio_kstats_t *hiokp;
1513 
1514 	vresp = (vnet_res_t *)ksp->ks_private;
1515 	vnetp = vresp->vnetp;
1516 
1517 	bzero(&statsp, sizeof (vnet_hio_stats_t));
1518 
1519 	READ_ENTER(&vnetp->vsw_fp_rw);
1520 	if (vnetp->hio_fp == NULL) {
1521 		/* not using hio resources, just return */
1522 		RW_EXIT(&vnetp->vsw_fp_rw);
1523 		return (0);
1524 	}
1525 	VNET_FDBE_REFHOLD(vnetp->hio_fp);
1526 	RW_EXIT(&vnetp->vsw_fp_rw);
1527 	vnet_hio_get_stats(vnetp->hio_fp, &statsp);
1528 	VNET_FDBE_REFRELE(vnetp->hio_fp);
1529 
1530 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1531 
1532 	if (rw == KSTAT_READ) {
1533 		/* Link Input/Output stats */
1534 		hiokp->ipackets.value.ul	= (uint32_t)statsp.ipackets;
1535 		hiokp->ipackets64.value.ull	= statsp.ipackets;
1536 		hiokp->ierrors.value.ul		= statsp.ierrors;
1537 		hiokp->opackets.value.ul	= (uint32_t)statsp.opackets;
1538 		hiokp->opackets64.value.ull	= statsp.opackets;
1539 		hiokp->oerrors.value.ul		= statsp.oerrors;
1540 
1541 		/* MIB II kstat variables */
1542 		hiokp->rbytes.value.ul		= (uint32_t)statsp.rbytes;
1543 		hiokp->rbytes64.value.ull	= statsp.rbytes;
1544 		hiokp->obytes.value.ul		= (uint32_t)statsp.obytes;
1545 		hiokp->obytes64.value.ull	= statsp.obytes;
1546 		hiokp->multircv.value.ul	= statsp.multircv;
1547 		hiokp->multixmt.value.ul	= statsp.multixmt;
1548 		hiokp->brdcstrcv.value.ul	= statsp.brdcstrcv;
1549 		hiokp->brdcstxmt.value.ul	= statsp.brdcstxmt;
1550 		hiokp->norcvbuf.value.ul	= statsp.norcvbuf;
1551 		hiokp->noxmtbuf.value.ul	= statsp.noxmtbuf;
1552 	} else {
1553 		return (EACCES);
1554 	}
1555 
1556 	return (0);
1557 }
1558 
1559 static void
1560 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp)
1561 {
1562 	mac_register_t		*macp;
1563 	mac_callbacks_t		*cbp;
1564 	uint64_t		val;
1565 	int			stat;
1566 
1567 	/*
1568 	 * get the specified statistics from the underlying nxge.
1569 	 */
1570 	macp = &vresp->macreg;
1571 	cbp = macp->m_callbacks;
1572 	for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) {
1573 		if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) {
1574 			switch (stat) {
1575 			case MAC_STAT_IPACKETS:
1576 				statsp->ipackets = val;
1577 				break;
1578 
1579 			case MAC_STAT_IERRORS:
1580 				statsp->ierrors = val;
1581 				break;
1582 
1583 			case MAC_STAT_OPACKETS:
1584 				statsp->opackets = val;
1585 				break;
1586 
1587 			case MAC_STAT_OERRORS:
1588 				statsp->oerrors = val;
1589 				break;
1590 
1591 			case MAC_STAT_RBYTES:
1592 				statsp->rbytes = val;
1593 				break;
1594 
1595 			case MAC_STAT_OBYTES:
1596 				statsp->obytes = val;
1597 				break;
1598 
1599 			case MAC_STAT_MULTIRCV:
1600 				statsp->multircv = val;
1601 				break;
1602 
1603 			case MAC_STAT_MULTIXMT:
1604 				statsp->multixmt = val;
1605 				break;
1606 
1607 			case MAC_STAT_BRDCSTRCV:
1608 				statsp->brdcstrcv = val;
1609 				break;
1610 
1611 			case MAC_STAT_BRDCSTXMT:
1612 				statsp->brdcstxmt = val;
1613 				break;
1614 
1615 			case MAC_STAT_NOXMTBUF:
1616 				statsp->noxmtbuf = val;
1617 				break;
1618 
1619 			case MAC_STAT_NORCVBUF:
1620 				statsp->norcvbuf = val;
1621 				break;
1622 
1623 			default:
1624 				/*
1625 				 * parameters not interested.
1626 				 */
1627 				break;
1628 			}
1629 		}
1630 	}
1631 }
1632