xref: /titanic_44/usr/src/uts/sun4v/io/vnet.c (revision 1f03f0496b37f42bc76df041144b1cf7e47fcda4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/param.h>
30 #include <sys/stream.h>
31 #include <sys/kmem.h>
32 #include <sys/conf.h>
33 #include <sys/devops.h>
34 #include <sys/ksynch.h>
35 #include <sys/stat.h>
36 #include <sys/modctl.h>
37 #include <sys/modhash.h>
38 #include <sys/debug.h>
39 #include <sys/ethernet.h>
40 #include <sys/dlpi.h>
41 #include <net/if.h>
42 #include <sys/mac_provider.h>
43 #include <sys/mac_ether.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/strsun.h>
47 #include <sys/note.h>
48 #include <sys/atomic.h>
49 #include <sys/vnet.h>
50 #include <sys/vlan.h>
51 #include <sys/vnet_mailbox.h>
52 #include <sys/vnet_common.h>
53 #include <sys/dds.h>
54 #include <sys/strsubr.h>
55 #include <sys/taskq.h>
56 
57 /*
58  * Function prototypes.
59  */
60 
61 /* DDI entrypoints */
62 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
63 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
64 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
65 
66 /* MAC entrypoints  */
67 static int vnet_m_stat(void *, uint_t, uint64_t *);
68 static int vnet_m_start(void *);
69 static void vnet_m_stop(void *);
70 static int vnet_m_promisc(void *, boolean_t);
71 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
72 static int vnet_m_unicst(void *, const uint8_t *);
73 mblk_t *vnet_m_tx(void *, mblk_t *);
74 static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp);
75 #ifdef	VNET_IOC_DEBUG
76 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp);
77 #endif
78 
79 /* vnet internal functions */
80 static int vnet_unattach(vnet_t *vnetp);
81 static int vnet_mac_register(vnet_t *);
82 static int vnet_read_mac_address(vnet_t *vnetp);
83 
84 /* Forwarding database (FDB) routines */
85 static void vnet_fdb_create(vnet_t *vnetp);
86 static void vnet_fdb_destroy(vnet_t *vnetp);
87 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp);
88 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
89 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp);
90 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp);
91 
92 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp);
93 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp);
94 static void vnet_tx_update(vio_net_handle_t vrh);
95 static void vnet_res_start_task(void *arg);
96 static void vnet_start_resources(vnet_t *vnetp);
97 static void vnet_stop_resources(vnet_t *vnetp);
98 static void vnet_dispatch_res_task(vnet_t *vnetp);
99 static void vnet_res_start_task(void *arg);
100 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
101 
102 /* Exported to vnet_gen */
103 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
104 void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
105 void vnet_dds_cleanup_hio(vnet_t *vnetp);
106 
107 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name,
108     vnet_res_t *vresp);
109 static int vnet_hio_update_kstats(kstat_t *ksp, int rw);
110 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp);
111 static void vnet_hio_destroy_kstats(kstat_t *ksp);
112 
113 /* Exported to to vnet_dds */
114 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
115 
116 /* Externs that are imported from vnet_gen */
117 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
118     const uint8_t *macaddr, void **vgenhdl);
119 extern void vgen_uninit(void *arg);
120 extern int vgen_dds_tx(void *arg, void *dmsg);
121 extern void vgen_mod_init(void);
122 extern int vgen_mod_cleanup(void);
123 extern void vgen_mod_fini(void);
124 
125 /* Externs that are imported from vnet_dds */
126 extern void vdds_mod_init(void);
127 extern void vdds_mod_fini(void);
128 extern int vdds_init(vnet_t *vnetp);
129 extern void vdds_cleanup(vnet_t *vnetp);
130 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
131 extern void vdds_cleanup_hybrid_res(void *arg);
132 extern void vdds_cleanup_hio(vnet_t *vnetp);
133 
134 #define	DRV_NAME	"vnet"
135 #define	VNET_FDBE_REFHOLD(p)						\
136 {									\
137 	atomic_inc_32(&(p)->refcnt);					\
138 	ASSERT((p)->refcnt != 0);					\
139 }
140 
141 #define	VNET_FDBE_REFRELE(p)						\
142 {									\
143 	ASSERT((p)->refcnt != 0);					\
144 	atomic_dec_32(&(p)->refcnt);					\
145 }
146 
147 #ifdef	VNET_IOC_DEBUG
148 #define	VNET_M_CALLBACK_FLAGS	(MC_IOCTL)
149 #else
150 #define	VNET_M_CALLBACK_FLAGS	(0)
151 #endif
152 
153 static mac_callbacks_t vnet_m_callbacks = {
154 	VNET_M_CALLBACK_FLAGS,
155 	vnet_m_stat,
156 	vnet_m_start,
157 	vnet_m_stop,
158 	vnet_m_promisc,
159 	vnet_m_multicst,
160 	vnet_m_unicst,
161 	vnet_m_tx,
162 	vnet_m_ioctl,
163 	NULL,
164 	NULL
165 };
166 
167 /*
168  * Linked list of "vnet_t" structures - one per instance.
169  */
170 static vnet_t	*vnet_headp = NULL;
171 static krwlock_t vnet_rw;
172 
173 /* Tunables */
174 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
175 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
176 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
177 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
178 
179 /*
180  * Set this to non-zero to enable additional internal receive buffer pools
181  * based on the MTU of the device for better performance at the cost of more
182  * memory consumption. This is turned off by default, to use allocb(9F) for
183  * receive buffer allocations of sizes > 2K.
184  */
185 boolean_t vnet_jumbo_rxpools = B_FALSE;
186 
187 /* # of chains in fdb hash table */
188 uint32_t	vnet_fdb_nchains = VNET_NFDB_HASH;
189 
190 /* Internal tunables */
191 uint32_t	vnet_ethermtu = 1500;	/* mtu of the device */
192 
193 /*
194  * Default vlan id. This is only used internally when the "default-vlan-id"
195  * property is not present in the MD device node. Therefore, this should not be
196  * used as a tunable; if this value is changed, the corresponding variable
197  * should be updated to the same value in vsw and also other vnets connected to
198  * the same vsw.
199  */
200 uint16_t	vnet_default_vlan_id = 1;
201 
202 /* delay in usec to wait for all references on a fdb entry to be dropped */
203 uint32_t vnet_fdbe_refcnt_delay = 10;
204 
205 static struct ether_addr etherbroadcastaddr = {
206 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
207 };
208 
209 
210 /*
211  * Property names
212  */
213 static char macaddr_propname[] = "local-mac-address";
214 
215 /*
216  * This is the string displayed by modinfo(1m).
217  */
218 static char vnet_ident[] = "vnet driver";
219 extern struct mod_ops mod_driverops;
220 static struct cb_ops cb_vnetops = {
221 	nulldev,		/* cb_open */
222 	nulldev,		/* cb_close */
223 	nodev,			/* cb_strategy */
224 	nodev,			/* cb_print */
225 	nodev,			/* cb_dump */
226 	nodev,			/* cb_read */
227 	nodev,			/* cb_write */
228 	nodev,			/* cb_ioctl */
229 	nodev,			/* cb_devmap */
230 	nodev,			/* cb_mmap */
231 	nodev,			/* cb_segmap */
232 	nochpoll,		/* cb_chpoll */
233 	ddi_prop_op,		/* cb_prop_op */
234 	NULL,			/* cb_stream */
235 	(int)(D_MP)		/* cb_flag */
236 };
237 
238 static struct dev_ops vnetops = {
239 	DEVO_REV,		/* devo_rev */
240 	0,			/* devo_refcnt */
241 	NULL,			/* devo_getinfo */
242 	nulldev,		/* devo_identify */
243 	nulldev,		/* devo_probe */
244 	vnetattach,		/* devo_attach */
245 	vnetdetach,		/* devo_detach */
246 	nodev,			/* devo_reset */
247 	&cb_vnetops,		/* devo_cb_ops */
248 	(struct bus_ops *)NULL,	/* devo_bus_ops */
249 	NULL,			/* devo_power */
250 	ddi_quiesce_not_supported,	/* devo_quiesce */
251 };
252 
253 static struct modldrv modldrv = {
254 	&mod_driverops,		/* Type of module.  This one is a driver */
255 	vnet_ident,		/* ID string */
256 	&vnetops		/* driver specific ops */
257 };
258 
259 static struct modlinkage modlinkage = {
260 	MODREV_1, (void *)&modldrv, NULL
261 };
262 
263 #ifdef DEBUG
264 
265 /*
266  * Print debug messages - set to 0xf to enable all msgs
267  */
268 int vnet_dbglevel = 0x8;
269 
270 static void
271 debug_printf(const char *fname, void *arg, const char *fmt, ...)
272 {
273 	char    buf[512];
274 	va_list ap;
275 	vnet_t *vnetp = (vnet_t *)arg;
276 	char    *bufp = buf;
277 
278 	if (vnetp == NULL) {
279 		(void) sprintf(bufp, "%s: ", fname);
280 		bufp += strlen(bufp);
281 	} else {
282 		(void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
283 		bufp += strlen(bufp);
284 	}
285 	va_start(ap, fmt);
286 	(void) vsprintf(bufp, fmt, ap);
287 	va_end(ap);
288 	cmn_err(CE_CONT, "%s\n", buf);
289 }
290 
291 #endif
292 
293 /* _init(9E): initialize the loadable module */
294 int
295 _init(void)
296 {
297 	int status;
298 
299 	DBG1(NULL, "enter\n");
300 
301 	mac_init_ops(&vnetops, "vnet");
302 	status = mod_install(&modlinkage);
303 	if (status != 0) {
304 		mac_fini_ops(&vnetops);
305 	}
306 	vdds_mod_init();
307 	vgen_mod_init();
308 	DBG1(NULL, "exit(%d)\n", status);
309 	return (status);
310 }
311 
312 /* _fini(9E): prepare the module for unloading. */
313 int
314 _fini(void)
315 {
316 	int		status;
317 
318 	DBG1(NULL, "enter\n");
319 
320 	status = vgen_mod_cleanup();
321 	if (status != 0)
322 		return (status);
323 
324 	status = mod_remove(&modlinkage);
325 	if (status != 0)
326 		return (status);
327 	mac_fini_ops(&vnetops);
328 	vgen_mod_fini();
329 	vdds_mod_fini();
330 
331 	DBG1(NULL, "exit(%d)\n", status);
332 	return (status);
333 }
334 
335 /* _info(9E): return information about the loadable module */
336 int
337 _info(struct modinfo *modinfop)
338 {
339 	return (mod_info(&modlinkage, modinfop));
340 }
341 
342 /*
343  * attach(9E): attach a device to the system.
344  * called once for each instance of the device on the system.
345  */
346 static int
347 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
348 {
349 	vnet_t			*vnetp;
350 	int			status;
351 	int			instance;
352 	uint64_t		reg;
353 	char			qname[TASKQ_NAMELEN];
354 	vnet_attach_progress_t	attach_progress;
355 
356 	attach_progress = AST_init;
357 
358 	switch (cmd) {
359 	case DDI_ATTACH:
360 		break;
361 	case DDI_RESUME:
362 	case DDI_PM_RESUME:
363 	default:
364 		goto vnet_attach_fail;
365 	}
366 
367 	instance = ddi_get_instance(dip);
368 	DBG1(NULL, "instance(%d) enter\n", instance);
369 
370 	/* allocate vnet_t and mac_t structures */
371 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
372 	vnetp->dip = dip;
373 	vnetp->instance = instance;
374 	rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL);
375 	rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
376 	attach_progress |= AST_vnet_alloc;
377 
378 	status = vdds_init(vnetp);
379 	if (status != 0) {
380 		goto vnet_attach_fail;
381 	}
382 	attach_progress |= AST_vdds_init;
383 
384 	/* setup links to vnet_t from both devinfo and mac_t */
385 	ddi_set_driver_private(dip, (caddr_t)vnetp);
386 
387 	/* read the mac address */
388 	status = vnet_read_mac_address(vnetp);
389 	if (status != DDI_SUCCESS) {
390 		goto vnet_attach_fail;
391 	}
392 	attach_progress |= AST_read_macaddr;
393 
394 	reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
395 	    DDI_PROP_DONTPASS, "reg", -1);
396 	if (reg == -1) {
397 		goto vnet_attach_fail;
398 	}
399 	vnetp->reg = reg;
400 
401 	vnet_fdb_create(vnetp);
402 	attach_progress |= AST_fdbh_alloc;
403 
404 	(void) snprintf(qname, TASKQ_NAMELEN, "vnet_taskq%d", instance);
405 	if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1,
406 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
407 		cmn_err(CE_WARN, "!vnet%d: Unable to create task queue",
408 		    instance);
409 		goto vnet_attach_fail;
410 	}
411 	attach_progress |= AST_taskq_create;
412 
413 	/* add to the list of vnet devices */
414 	WRITE_ENTER(&vnet_rw);
415 	vnetp->nextp = vnet_headp;
416 	vnet_headp = vnetp;
417 	RW_EXIT(&vnet_rw);
418 
419 	attach_progress |= AST_vnet_list;
420 
421 	/*
422 	 * Initialize the generic vnet plugin which provides
423 	 * communication via sun4v LDC (logical domain channel) based
424 	 * resources. It will register the LDC resources as and when
425 	 * they become available.
426 	 */
427 	status = vgen_init(vnetp, reg, vnetp->dip,
428 	    (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
429 	if (status != DDI_SUCCESS) {
430 		DERR(vnetp, "vgen_init() failed\n");
431 		goto vnet_attach_fail;
432 	}
433 	attach_progress |= AST_vgen_init;
434 
435 	/* register with MAC layer */
436 	status = vnet_mac_register(vnetp);
437 	if (status != DDI_SUCCESS) {
438 		goto vnet_attach_fail;
439 	}
440 	vnetp->link_state = LINK_STATE_UNKNOWN;
441 
442 	attach_progress |= AST_macreg;
443 
444 	vnetp->attach_progress = attach_progress;
445 
446 	DBG1(NULL, "instance(%d) exit\n", instance);
447 	return (DDI_SUCCESS);
448 
449 vnet_attach_fail:
450 	vnetp->attach_progress = attach_progress;
451 	status = vnet_unattach(vnetp);
452 	ASSERT(status == 0);
453 	return (DDI_FAILURE);
454 }
455 
456 /*
457  * detach(9E): detach a device from the system.
458  */
459 static int
460 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
461 {
462 	vnet_t		*vnetp;
463 	int		instance;
464 
465 	instance = ddi_get_instance(dip);
466 	DBG1(NULL, "instance(%d) enter\n", instance);
467 
468 	vnetp = ddi_get_driver_private(dip);
469 	if (vnetp == NULL) {
470 		goto vnet_detach_fail;
471 	}
472 
473 	switch (cmd) {
474 	case DDI_DETACH:
475 		break;
476 	case DDI_SUSPEND:
477 	case DDI_PM_SUSPEND:
478 	default:
479 		goto vnet_detach_fail;
480 	}
481 
482 	if (vnet_unattach(vnetp) != 0) {
483 		goto vnet_detach_fail;
484 	}
485 
486 	return (DDI_SUCCESS);
487 
488 vnet_detach_fail:
489 	return (DDI_FAILURE);
490 }
491 
492 /*
493  * Common routine to handle vnetattach() failure and vnetdetach(). Note that
494  * the only reason this function could fail is if mac_unregister() fails.
495  * Otherwise, this function must ensure that all resources are freed and return
496  * success.
497  */
498 static int
499 vnet_unattach(vnet_t *vnetp)
500 {
501 	vnet_attach_progress_t	attach_progress;
502 
503 	attach_progress = vnetp->attach_progress;
504 
505 	/*
506 	 * Unregister from the gldv3 subsystem. This can fail, in particular
507 	 * if there are still any open references to this mac device; in which
508 	 * case we just return failure without continuing to detach further.
509 	 */
510 	if (attach_progress & AST_macreg) {
511 		if (mac_unregister(vnetp->mh) != 0) {
512 			return (1);
513 		}
514 		attach_progress &= ~AST_macreg;
515 	}
516 
517 	/*
518 	 * Now that we have unregistered from gldv3, we must finish all other
519 	 * steps and successfully return from this function; otherwise we will
520 	 * end up leaving the device in a broken/unusable state.
521 	 *
522 	 * First, release any hybrid resources assigned to this vnet device.
523 	 */
524 	if (attach_progress & AST_vdds_init) {
525 		vdds_cleanup(vnetp);
526 		attach_progress &= ~AST_vdds_init;
527 	}
528 
529 	/*
530 	 * Uninit vgen. This stops further mdeg callbacks to this vnet
531 	 * device and/or its ports; and detaches any existing ports.
532 	 */
533 	if (attach_progress & AST_vgen_init) {
534 		vgen_uninit(vnetp->vgenhdl);
535 		attach_progress &= ~AST_vgen_init;
536 	}
537 
538 	/* Destroy the taskq. */
539 	if (attach_progress & AST_taskq_create) {
540 		ddi_taskq_destroy(vnetp->taskqp);
541 		attach_progress &= ~AST_taskq_create;
542 	}
543 
544 	/* Destroy fdb. */
545 	if (attach_progress & AST_fdbh_alloc) {
546 		vnet_fdb_destroy(vnetp);
547 		attach_progress &= ~AST_fdbh_alloc;
548 	}
549 
550 	/* Remove from the device list */
551 	if (attach_progress & AST_vnet_list) {
552 		vnet_t		**vnetpp;
553 		/* unlink from instance(vnet_t) list */
554 		WRITE_ENTER(&vnet_rw);
555 		for (vnetpp = &vnet_headp; *vnetpp;
556 		    vnetpp = &(*vnetpp)->nextp) {
557 			if (*vnetpp == vnetp) {
558 				*vnetpp = vnetp->nextp;
559 				break;
560 			}
561 		}
562 		RW_EXIT(&vnet_rw);
563 		attach_progress &= ~AST_vnet_list;
564 	}
565 
566 	if (attach_progress & AST_vnet_alloc) {
567 		rw_destroy(&vnetp->vrwlock);
568 		rw_destroy(&vnetp->vsw_fp_rw);
569 		attach_progress &= ~AST_vnet_list;
570 		KMEM_FREE(vnetp);
571 	}
572 
573 	return (0);
574 }
575 
576 /* enable the device for transmit/receive */
577 static int
578 vnet_m_start(void *arg)
579 {
580 	vnet_t		*vnetp = arg;
581 
582 	DBG1(vnetp, "enter\n");
583 
584 	WRITE_ENTER(&vnetp->vrwlock);
585 	vnetp->flags |= VNET_STARTED;
586 	vnet_start_resources(vnetp);
587 	RW_EXIT(&vnetp->vrwlock);
588 
589 	DBG1(vnetp, "exit\n");
590 	return (VNET_SUCCESS);
591 
592 }
593 
594 /* stop transmit/receive for the device */
595 static void
596 vnet_m_stop(void *arg)
597 {
598 	vnet_t		*vnetp = arg;
599 
600 	DBG1(vnetp, "enter\n");
601 
602 	WRITE_ENTER(&vnetp->vrwlock);
603 	if (vnetp->flags & VNET_STARTED) {
604 		vnet_stop_resources(vnetp);
605 		vnetp->flags &= ~VNET_STARTED;
606 	}
607 	RW_EXIT(&vnetp->vrwlock);
608 
609 	DBG1(vnetp, "exit\n");
610 }
611 
612 /* set the unicast mac address of the device */
613 static int
614 vnet_m_unicst(void *arg, const uint8_t *macaddr)
615 {
616 	_NOTE(ARGUNUSED(macaddr))
617 
618 	vnet_t *vnetp = arg;
619 
620 	DBG1(vnetp, "enter\n");
621 	/*
622 	 * NOTE: setting mac address dynamically is not supported.
623 	 */
624 	DBG1(vnetp, "exit\n");
625 
626 	return (VNET_FAILURE);
627 }
628 
629 /* enable/disable a multicast address */
630 static int
631 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
632 {
633 	_NOTE(ARGUNUSED(add, mca))
634 
635 	vnet_t *vnetp = arg;
636 	vnet_res_t	*vresp;
637 	mac_register_t	*macp;
638 	mac_callbacks_t	*cbp;
639 	int rv = VNET_SUCCESS;
640 
641 	DBG1(vnetp, "enter\n");
642 
643 	READ_ENTER(&vnetp->vrwlock);
644 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
645 		if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
646 			macp = &vresp->macreg;
647 			cbp = macp->m_callbacks;
648 			rv = cbp->mc_multicst(macp->m_driver, add, mca);
649 		}
650 	}
651 	RW_EXIT(&vnetp->vrwlock);
652 
653 	DBG1(vnetp, "exit(%d)\n", rv);
654 	return (rv);
655 }
656 
657 /* set or clear promiscuous mode on the device */
658 static int
659 vnet_m_promisc(void *arg, boolean_t on)
660 {
661 	_NOTE(ARGUNUSED(on))
662 
663 	vnet_t *vnetp = arg;
664 	DBG1(vnetp, "enter\n");
665 	/*
666 	 * NOTE: setting promiscuous mode is not supported, just return success.
667 	 */
668 	DBG1(vnetp, "exit\n");
669 	return (VNET_SUCCESS);
670 }
671 
672 /*
673  * Transmit a chain of packets. This function provides switching functionality
674  * based on the destination mac address to reach other guests (within ldoms) or
675  * external hosts.
676  */
677 mblk_t *
678 vnet_m_tx(void *arg, mblk_t *mp)
679 {
680 	vnet_t			*vnetp;
681 	vnet_res_t		*vresp;
682 	mblk_t			*next;
683 	mblk_t			*resid_mp;
684 	mac_register_t		*macp;
685 	struct ether_header	*ehp;
686 	boolean_t		is_unicast;
687 	boolean_t		is_pvid;	/* non-default pvid ? */
688 	boolean_t		hres;		/* Hybrid resource ? */
689 
690 	vnetp = (vnet_t *)arg;
691 	DBG1(vnetp, "enter\n");
692 	ASSERT(mp != NULL);
693 
694 	is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE;
695 
696 	while (mp != NULL) {
697 
698 		next = mp->b_next;
699 		mp->b_next = NULL;
700 
701 		/*
702 		 * Find fdb entry for the destination
703 		 * and hold a reference to it.
704 		 */
705 		ehp = (struct ether_header *)mp->b_rptr;
706 		vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
707 		if (vresp != NULL) {
708 
709 			/*
710 			 * Destination found in FDB.
711 			 * The destination is a vnet device within ldoms
712 			 * and directly reachable, invoke the tx function
713 			 * in the fdb entry.
714 			 */
715 			macp = &vresp->macreg;
716 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
717 
718 			/* tx done; now release ref on fdb entry */
719 			VNET_FDBE_REFRELE(vresp);
720 
721 			if (resid_mp != NULL) {
722 				/* m_tx failed */
723 				mp->b_next = next;
724 				break;
725 			}
726 		} else {
727 			is_unicast = !(IS_BROADCAST(ehp) ||
728 			    (IS_MULTICAST(ehp)));
729 			/*
730 			 * Destination is not in FDB.
731 			 * If the destination is broadcast or multicast,
732 			 * then forward the packet to vswitch.
733 			 * If a Hybrid resource avilable, then send the
734 			 * unicast packet via hybrid resource, otherwise
735 			 * forward it to vswitch.
736 			 */
737 			READ_ENTER(&vnetp->vsw_fp_rw);
738 
739 			if ((is_unicast) && (vnetp->hio_fp != NULL)) {
740 				vresp = vnetp->hio_fp;
741 				hres = B_TRUE;
742 			} else {
743 				vresp = vnetp->vsw_fp;
744 				hres = B_FALSE;
745 			}
746 			if (vresp == NULL) {
747 				/*
748 				 * no fdb entry to vsw? drop the packet.
749 				 */
750 				RW_EXIT(&vnetp->vsw_fp_rw);
751 				freemsg(mp);
752 				mp = next;
753 				continue;
754 			}
755 
756 			/* ref hold the fdb entry to vsw */
757 			VNET_FDBE_REFHOLD(vresp);
758 
759 			RW_EXIT(&vnetp->vsw_fp_rw);
760 
761 			/*
762 			 * In the case of a hybrid resource we need to insert
763 			 * the tag for the pvid case here; unlike packets that
764 			 * are destined to a vnet/vsw in which case the vgen
765 			 * layer does the tagging before sending it over ldc.
766 			 */
767 			if (hres == B_TRUE) {
768 				/*
769 				 * Determine if the frame being transmitted
770 				 * over the hybrid resource is untagged. If so,
771 				 * insert the tag before transmitting.
772 				 */
773 				if (is_pvid == B_TRUE &&
774 				    ehp->ether_type != htons(ETHERTYPE_VLAN)) {
775 
776 					mp = vnet_vlan_insert_tag(mp,
777 					    vnetp->pvid);
778 					if (mp == NULL) {
779 						VNET_FDBE_REFRELE(vresp);
780 						mp = next;
781 						continue;
782 					}
783 
784 				}
785 			}
786 
787 			macp = &vresp->macreg;
788 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
789 
790 			/* tx done; now release ref on fdb entry */
791 			VNET_FDBE_REFRELE(vresp);
792 
793 			if (resid_mp != NULL) {
794 				/* m_tx failed */
795 				mp->b_next = next;
796 				break;
797 			}
798 		}
799 
800 		mp = next;
801 	}
802 
803 	DBG1(vnetp, "exit\n");
804 	return (mp);
805 }
806 
807 /* get statistics from the device */
808 int
809 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
810 {
811 	vnet_t *vnetp = arg;
812 	vnet_res_t	*vresp;
813 	mac_register_t	*macp;
814 	mac_callbacks_t	*cbp;
815 	uint64_t val_total = 0;
816 
817 	DBG1(vnetp, "enter\n");
818 
819 	/*
820 	 * get the specified statistic from each transport and return the
821 	 * aggregate val.  This obviously only works for counters.
822 	 */
823 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
824 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
825 		return (ENOTSUP);
826 	}
827 
828 	READ_ENTER(&vnetp->vrwlock);
829 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
830 		macp = &vresp->macreg;
831 		cbp = macp->m_callbacks;
832 		if (cbp->mc_getstat(macp->m_driver, stat, val) == 0)
833 			val_total += *val;
834 	}
835 	RW_EXIT(&vnetp->vrwlock);
836 
837 	*val = val_total;
838 
839 	DBG1(vnetp, "exit\n");
840 	return (0);
841 }
842 
843 /* wrapper function for mac_register() */
844 static int
845 vnet_mac_register(vnet_t *vnetp)
846 {
847 	mac_register_t	*macp;
848 	int		err;
849 
850 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
851 		return (DDI_FAILURE);
852 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
853 	macp->m_driver = vnetp;
854 	macp->m_dip = vnetp->dip;
855 	macp->m_src_addr = vnetp->curr_macaddr;
856 	macp->m_callbacks = &vnet_m_callbacks;
857 	macp->m_min_sdu = 0;
858 	macp->m_max_sdu = vnetp->mtu;
859 	macp->m_margin = VLAN_TAGSZ;
860 
861 	/*
862 	 * Finally, we're ready to register ourselves with the MAC layer
863 	 * interface; if this succeeds, we're all ready to start()
864 	 */
865 	err = mac_register(macp, &vnetp->mh);
866 	mac_free(macp);
867 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
868 }
869 
870 /* read the mac address of the device */
871 static int
872 vnet_read_mac_address(vnet_t *vnetp)
873 {
874 	uchar_t 	*macaddr;
875 	uint32_t 	size;
876 	int 		rv;
877 
878 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
879 	    DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
880 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
881 		DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
882 		    macaddr_propname, rv);
883 		return (DDI_FAILURE);
884 	}
885 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
886 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
887 	ddi_prop_free(macaddr);
888 
889 	return (DDI_SUCCESS);
890 }
891 
892 static void
893 vnet_fdb_create(vnet_t *vnetp)
894 {
895 	char		hashname[MAXNAMELEN];
896 
897 	(void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
898 	    vnetp->instance);
899 	vnetp->fdb_nchains = vnet_fdb_nchains;
900 	vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
901 	    mod_hash_null_valdtor, sizeof (void *));
902 }
903 
904 static void
905 vnet_fdb_destroy(vnet_t *vnetp)
906 {
907 	/* destroy fdb-hash-table */
908 	if (vnetp->fdb_hashp != NULL) {
909 		mod_hash_destroy_hash(vnetp->fdb_hashp);
910 		vnetp->fdb_hashp = NULL;
911 		vnetp->fdb_nchains = 0;
912 	}
913 }
914 
915 /*
916  * Add an entry into the fdb.
917  */
918 void
919 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp)
920 {
921 	uint64_t	addr = 0;
922 	int		rv;
923 
924 	KEY_HASH(addr, vresp->rem_macaddr);
925 
926 	/*
927 	 * If the entry being added corresponds to LDC_SERVICE resource,
928 	 * that is, vswitch connection, it is added to the hash and also
929 	 * the entry is cached, an additional reference count reflects
930 	 * this. The HYBRID resource is not added to the hash, but only
931 	 * cached, as it is only used for sending out packets for unknown
932 	 * unicast destinations.
933 	 */
934 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
935 	    (vresp->refcnt = 1) : (vresp->refcnt = 0);
936 
937 	/*
938 	 * Note: duplicate keys will be rejected by mod_hash.
939 	 */
940 	if (vresp->type != VIO_NET_RES_HYBRID) {
941 		rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
942 		    (mod_hash_val_t)vresp);
943 		if (rv != 0) {
944 			DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
945 			return;
946 		}
947 	}
948 
949 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
950 		/* Cache the fdb entry to vsw-port */
951 		WRITE_ENTER(&vnetp->vsw_fp_rw);
952 		if (vnetp->vsw_fp == NULL)
953 			vnetp->vsw_fp = vresp;
954 		RW_EXIT(&vnetp->vsw_fp_rw);
955 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
956 		/* Cache the fdb entry to hybrid resource */
957 		WRITE_ENTER(&vnetp->vsw_fp_rw);
958 		if (vnetp->hio_fp == NULL)
959 			vnetp->hio_fp = vresp;
960 		RW_EXIT(&vnetp->vsw_fp_rw);
961 	}
962 }
963 
964 /*
965  * Remove an entry from fdb.
966  */
967 static void
968 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp)
969 {
970 	uint64_t	addr = 0;
971 	int		rv;
972 	uint32_t	refcnt;
973 	vnet_res_t	*tmp;
974 
975 	KEY_HASH(addr, vresp->rem_macaddr);
976 
977 	/*
978 	 * Remove the entry from fdb hash table.
979 	 * This prevents further references to this fdb entry.
980 	 */
981 	if (vresp->type != VIO_NET_RES_HYBRID) {
982 		rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
983 		    (mod_hash_val_t *)&tmp);
984 		if (rv != 0) {
985 			/*
986 			 * As the resources are added to the hash only
987 			 * after they are started, this can occur if
988 			 * a resource unregisters before it is ever started.
989 			 */
990 			return;
991 		}
992 	}
993 
994 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
995 		WRITE_ENTER(&vnetp->vsw_fp_rw);
996 
997 		ASSERT(tmp == vnetp->vsw_fp);
998 		vnetp->vsw_fp = NULL;
999 
1000 		RW_EXIT(&vnetp->vsw_fp_rw);
1001 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
1002 		WRITE_ENTER(&vnetp->vsw_fp_rw);
1003 
1004 		vnetp->hio_fp = NULL;
1005 
1006 		RW_EXIT(&vnetp->vsw_fp_rw);
1007 	}
1008 
1009 	/*
1010 	 * If there are threads already ref holding before the entry was
1011 	 * removed from hash table, then wait for ref count to drop to zero.
1012 	 */
1013 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
1014 	    (refcnt = 1) : (refcnt = 0);
1015 	while (vresp->refcnt > refcnt) {
1016 		delay(drv_usectohz(vnet_fdbe_refcnt_delay));
1017 	}
1018 }
1019 
1020 /*
1021  * Search fdb for a given mac address. If an entry is found, hold
1022  * a reference to it and return the entry; else returns NULL.
1023  */
1024 static vnet_res_t *
1025 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
1026 {
1027 	uint64_t	key = 0;
1028 	vnet_res_t	*vresp;
1029 	int		rv;
1030 
1031 	KEY_HASH(key, addrp->ether_addr_octet);
1032 
1033 	rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
1034 	    (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb);
1035 
1036 	if (rv != 0)
1037 		return (NULL);
1038 
1039 	return (vresp);
1040 }
1041 
1042 /*
1043  * Callback function provided to mod_hash_find_cb(). After finding the fdb
1044  * entry corresponding to the key (macaddr), this callback will be invoked by
1045  * mod_hash_find_cb() to atomically increment the reference count on the fdb
1046  * entry before returning the found entry.
1047  */
1048 static void
1049 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
1050 {
1051 	_NOTE(ARGUNUSED(key))
1052 	VNET_FDBE_REFHOLD((vnet_res_t *)val);
1053 }
1054 
1055 /*
1056  * Frames received that are tagged with the pvid of the vnet device must be
1057  * untagged before sending up the stack. This function walks the chain of rx
1058  * frames, untags any such frames and returns the updated chain.
1059  *
1060  * Arguments:
1061  *    pvid:  pvid of the vnet device for which packets are being received
1062  *    mp:    head of pkt chain to be validated and untagged
1063  *
1064  * Returns:
1065  *    mp:    head of updated chain of packets
1066  */
1067 static void
1068 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp)
1069 {
1070 	struct ether_vlan_header	*evhp;
1071 	mblk_t				*bp;
1072 	mblk_t				*bpt;
1073 	mblk_t				*bph;
1074 	mblk_t				*bpn;
1075 
1076 	bpn = bph = bpt = NULL;
1077 
1078 	for (bp = *mp; bp != NULL; bp = bpn) {
1079 
1080 		bpn = bp->b_next;
1081 		bp->b_next = bp->b_prev = NULL;
1082 
1083 		evhp = (struct ether_vlan_header *)bp->b_rptr;
1084 
1085 		if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN &&
1086 		    VLAN_ID(ntohs(evhp->ether_tci)) == pvid) {
1087 
1088 			bp = vnet_vlan_remove_tag(bp);
1089 			if (bp == NULL) {
1090 				continue;
1091 			}
1092 
1093 		}
1094 
1095 		/* build a chain of processed packets */
1096 		if (bph == NULL) {
1097 			bph = bpt = bp;
1098 		} else {
1099 			bpt->b_next = bp;
1100 			bpt = bp;
1101 		}
1102 
1103 	}
1104 
1105 	*mp = bph;
1106 }
1107 
1108 static void
1109 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
1110 {
1111 	vnet_res_t	*vresp = (vnet_res_t *)vrh;
1112 	vnet_t		*vnetp = vresp->vnetp;
1113 
1114 	if ((vnetp == NULL) || (vnetp->mh == 0)) {
1115 		freemsgchain(mp);
1116 		return;
1117 	}
1118 
1119 	/*
1120 	 * Packets received over a hybrid resource need additional processing
1121 	 * to remove the tag, for the pvid case. The underlying resource is
1122 	 * not aware of the vnet's pvid and thus packets are received with the
1123 	 * vlan tag in the header; unlike packets that are received over a ldc
1124 	 * channel in which case the peer vnet/vsw would have already removed
1125 	 * the tag.
1126 	 */
1127 	if (vresp->type == VIO_NET_RES_HYBRID &&
1128 	    vnetp->pvid != vnetp->default_vlan_id) {
1129 
1130 		vnet_rx_frames_untag(vnetp->pvid, &mp);
1131 		if (mp == NULL) {
1132 			return;
1133 		}
1134 	}
1135 
1136 	mac_rx(vnetp->mh, NULL, mp);
1137 }
1138 
1139 void
1140 vnet_tx_update(vio_net_handle_t vrh)
1141 {
1142 	vnet_res_t *vresp = (vnet_res_t *)vrh;
1143 	vnet_t *vnetp = vresp->vnetp;
1144 
1145 	if ((vnetp != NULL) && (vnetp->mh != NULL)) {
1146 		mac_tx_update(vnetp->mh);
1147 	}
1148 }
1149 
1150 /*
1151  * Update the new mtu of vnet into the mac layer. First check if the device has
1152  * been plumbed and if so fail the mtu update. Returns 0 on success.
1153  */
1154 int
1155 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu)
1156 {
1157 	int	rv;
1158 
1159 	if (vnetp == NULL || vnetp->mh == NULL) {
1160 		return (EINVAL);
1161 	}
1162 
1163 	WRITE_ENTER(&vnetp->vrwlock);
1164 
1165 	if (vnetp->flags & VNET_STARTED) {
1166 		RW_EXIT(&vnetp->vrwlock);
1167 		cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu "
1168 		    "update as the device is plumbed\n",
1169 		    vnetp->instance);
1170 		return (EBUSY);
1171 	}
1172 
1173 	/* update mtu in the mac layer */
1174 	rv = mac_maxsdu_update(vnetp->mh, mtu);
1175 	if (rv != 0) {
1176 		RW_EXIT(&vnetp->vrwlock);
1177 		cmn_err(CE_NOTE,
1178 		    "!vnet%d: Unable to update mtu with mac layer\n",
1179 		    vnetp->instance);
1180 		return (EIO);
1181 	}
1182 
1183 	vnetp->mtu = mtu;
1184 
1185 	RW_EXIT(&vnetp->vrwlock);
1186 
1187 	return (0);
1188 }
1189 
1190 /*
1191  * Update the link state of vnet to the mac layer.
1192  */
1193 void
1194 vnet_link_update(vnet_t *vnetp, link_state_t link_state)
1195 {
1196 	if (vnetp == NULL || vnetp->mh == NULL) {
1197 		return;
1198 	}
1199 
1200 	WRITE_ENTER(&vnetp->vrwlock);
1201 	if (vnetp->link_state == link_state) {
1202 		RW_EXIT(&vnetp->vrwlock);
1203 		return;
1204 	}
1205 	vnetp->link_state = link_state;
1206 	RW_EXIT(&vnetp->vrwlock);
1207 
1208 	mac_link_update(vnetp->mh, link_state);
1209 }
1210 
1211 /*
1212  * vio_net_resource_reg -- An interface called to register a resource
1213  *	with vnet.
1214  *	macp -- a GLDv3 mac_register that has all the details of
1215  *		a resource and its callbacks etc.
1216  *	type -- resource type.
1217  *	local_macaddr -- resource's MAC address. This is used to
1218  *			 associate a resource with a corresponding vnet.
1219  *	remote_macaddr -- remote side MAC address. This is ignored for
1220  *			  the Hybrid resources.
1221  *	vhp -- A handle returned to the caller.
1222  *	vcb -- A set of callbacks provided to the callers.
1223  */
1224 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
1225     ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
1226     vio_net_callbacks_t *vcb)
1227 {
1228 	vnet_t	*vnetp;
1229 	vnet_res_t *vresp;
1230 
1231 	vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
1232 	ether_copy(local_macaddr, vresp->local_macaddr);
1233 	ether_copy(rem_macaddr, vresp->rem_macaddr);
1234 	vresp->type = type;
1235 	bcopy(macp, &vresp->macreg, sizeof (mac_register_t));
1236 
1237 	DBG1(NULL, "Resource Registerig type=0%X\n", type);
1238 
1239 	READ_ENTER(&vnet_rw);
1240 	vnetp = vnet_headp;
1241 	while (vnetp != NULL) {
1242 		if (VNET_MATCH_RES(vresp, vnetp)) {
1243 			vresp->vnetp = vnetp;
1244 
1245 			/* Setup kstats for hio resource */
1246 			if (vresp->type == VIO_NET_RES_HYBRID) {
1247 				vresp->ksp = vnet_hio_setup_kstats(DRV_NAME,
1248 				    "hio", vresp);
1249 				if (vresp->ksp == NULL) {
1250 					cmn_err(CE_NOTE, "!vnet%d: Cannot "
1251 					    "create kstats for hio resource",
1252 					    vnetp->instance);
1253 				}
1254 			}
1255 
1256 			WRITE_ENTER(&vnetp->vrwlock);
1257 			vresp->nextp = vnetp->vres_list;
1258 			vnetp->vres_list = vresp;
1259 			RW_EXIT(&vnetp->vrwlock);
1260 			break;
1261 		}
1262 		vnetp = vnetp->nextp;
1263 	}
1264 	RW_EXIT(&vnet_rw);
1265 	if (vresp->vnetp == NULL) {
1266 		DWARN(NULL, "No vnet instance");
1267 		kmem_free(vresp, sizeof (vnet_res_t));
1268 		return (ENXIO);
1269 	}
1270 
1271 	*vhp = vresp;
1272 	vcb->vio_net_rx_cb = vnet_rx;
1273 	vcb->vio_net_tx_update = vnet_tx_update;
1274 	vcb->vio_net_report_err = vnet_handle_res_err;
1275 
1276 	/* Dispatch a task to start resources */
1277 	vnet_dispatch_res_task(vnetp);
1278 	return (0);
1279 }
1280 
1281 /*
1282  * vio_net_resource_unreg -- An interface to unregister a resource.
1283  */
1284 void
1285 vio_net_resource_unreg(vio_net_handle_t vhp)
1286 {
1287 	vnet_res_t *vresp = (vnet_res_t *)vhp;
1288 	vnet_t *vnetp = vresp->vnetp;
1289 	vnet_res_t *vrp;
1290 	kstat_t *ksp = NULL;
1291 
1292 	DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
1293 
1294 	ASSERT(vnetp != NULL);
1295 	vnet_fdbe_del(vnetp, vresp);
1296 
1297 	WRITE_ENTER(&vnetp->vrwlock);
1298 	if (vresp == vnetp->vres_list) {
1299 		vnetp->vres_list = vresp->nextp;
1300 	} else {
1301 		vrp = vnetp->vres_list;
1302 		while (vrp->nextp != NULL) {
1303 			if (vrp->nextp == vresp) {
1304 				vrp->nextp = vresp->nextp;
1305 				break;
1306 			}
1307 			vrp = vrp->nextp;
1308 		}
1309 	}
1310 
1311 	ksp = vresp->ksp;
1312 	vresp->ksp = NULL;
1313 
1314 	vresp->vnetp = NULL;
1315 	vresp->nextp = NULL;
1316 	RW_EXIT(&vnetp->vrwlock);
1317 	vnet_hio_destroy_kstats(ksp);
1318 	KMEM_FREE(vresp);
1319 }
1320 
1321 /*
1322  * vnet_dds_rx -- an interface called by vgen to DDS messages.
1323  */
1324 void
1325 vnet_dds_rx(void *arg, void *dmsg)
1326 {
1327 	vnet_t *vnetp = arg;
1328 	vdds_process_dds_msg(vnetp, dmsg);
1329 }
1330 
1331 /*
1332  * vnet_send_dds_msg -- An interface provided to DDS to send
1333  *	DDS messages. This simply sends meessages via vgen.
1334  */
1335 int
1336 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg)
1337 {
1338 	int rv;
1339 
1340 	if (vnetp->vgenhdl != NULL) {
1341 		rv = vgen_dds_tx(vnetp->vgenhdl, dmsg);
1342 	}
1343 	return (rv);
1344 }
1345 
1346 /*
1347  * vnet_cleanup_hio -- an interface called by vgen to cleanup hio resources.
1348  */
1349 void
1350 vnet_dds_cleanup_hio(vnet_t *vnetp)
1351 {
1352 	vdds_cleanup_hio(vnetp);
1353 }
1354 
1355 /*
1356  * vnet_handle_res_err -- A callback function called by a resource
1357  *	to report an error. For example, vgen can call to report
1358  *	an LDC down/reset event. This will trigger cleanup of associated
1359  *	Hybrid resource.
1360  */
1361 /* ARGSUSED */
1362 static void
1363 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err)
1364 {
1365 	vnet_res_t *vresp = (vnet_res_t *)vrh;
1366 	vnet_t *vnetp = vresp->vnetp;
1367 
1368 	if (vnetp == NULL) {
1369 		return;
1370 	}
1371 	if ((vresp->type != VIO_NET_RES_LDC_SERVICE) &&
1372 	    (vresp->type != VIO_NET_RES_HYBRID)) {
1373 		return;
1374 	}
1375 
1376 	vdds_cleanup_hio(vnetp);
1377 }
1378 
1379 /*
1380  * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
1381  */
1382 static void
1383 vnet_dispatch_res_task(vnet_t *vnetp)
1384 {
1385 	int rv;
1386 
1387 	/*
1388 	 * Dispatch the task. It could be the case that vnetp->flags does
1389 	 * not have VNET_STARTED set. This is ok as vnet_rest_start_task()
1390 	 * can abort the task when the task is started.
1391 	 */
1392 	rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task,
1393 	    vnetp, DDI_NOSLEEP);
1394 	if (rv != DDI_SUCCESS) {
1395 		cmn_err(CE_WARN,
1396 		    "vnet%d:Can't dispatch start resource task",
1397 		    vnetp->instance);
1398 	}
1399 }
1400 
1401 /*
1402  * vnet_res_start_task -- A taskq callback function that starts a resource.
1403  */
1404 static void
1405 vnet_res_start_task(void *arg)
1406 {
1407 	vnet_t *vnetp = arg;
1408 
1409 	WRITE_ENTER(&vnetp->vrwlock);
1410 	if (vnetp->flags & VNET_STARTED) {
1411 		vnet_start_resources(vnetp);
1412 	}
1413 	RW_EXIT(&vnetp->vrwlock);
1414 }
1415 
1416 /*
1417  * vnet_start_resources -- starts all resources associated with
1418  *	a vnet.
1419  */
1420 static void
1421 vnet_start_resources(vnet_t *vnetp)
1422 {
1423 	mac_register_t	*macp;
1424 	mac_callbacks_t	*cbp;
1425 	vnet_res_t	*vresp;
1426 	int rv;
1427 
1428 	DBG1(vnetp, "enter\n");
1429 
1430 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
1431 		/* skip if it is already started */
1432 		if (vresp->flags & VNET_STARTED) {
1433 			continue;
1434 		}
1435 		macp = &vresp->macreg;
1436 		cbp = macp->m_callbacks;
1437 		rv = cbp->mc_start(macp->m_driver);
1438 		if (rv == 0) {
1439 			/*
1440 			 * Successfully started the resource, so now
1441 			 * add it to the fdb.
1442 			 */
1443 			vresp->flags |= VNET_STARTED;
1444 			vnet_fdbe_add(vnetp, vresp);
1445 		}
1446 	}
1447 
1448 	DBG1(vnetp, "exit\n");
1449 
1450 }
1451 
1452 /*
1453  * vnet_stop_resources -- stop all resources associated with a vnet.
1454  */
1455 static void
1456 vnet_stop_resources(vnet_t *vnetp)
1457 {
1458 	vnet_res_t	*vresp;
1459 	vnet_res_t	*nvresp;
1460 	mac_register_t	*macp;
1461 	mac_callbacks_t	*cbp;
1462 
1463 	DBG1(vnetp, "enter\n");
1464 
1465 	for (vresp = vnetp->vres_list; vresp != NULL; ) {
1466 		nvresp = vresp->nextp;
1467 		if (vresp->flags & VNET_STARTED) {
1468 			macp = &vresp->macreg;
1469 			cbp = macp->m_callbacks;
1470 			cbp->mc_stop(macp->m_driver);
1471 			vresp->flags &= ~VNET_STARTED;
1472 		}
1473 		vresp = nvresp;
1474 	}
1475 	DBG1(vnetp, "exit\n");
1476 }
1477 
1478 /*
1479  * Setup kstats for the HIO statistics.
1480  * NOTE: the synchronization for the statistics is the
1481  * responsibility of the caller.
1482  */
1483 kstat_t *
1484 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp)
1485 {
1486 	kstat_t *ksp;
1487 	vnet_t *vnetp = vresp->vnetp;
1488 	vnet_hio_kstats_t *hiokp;
1489 	size_t size;
1490 
1491 	ASSERT(vnetp != NULL);
1492 	size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t);
1493 	ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net",
1494 	    KSTAT_TYPE_NAMED, size, 0);
1495 	if (ksp == NULL) {
1496 		return (NULL);
1497 	}
1498 
1499 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1500 	kstat_named_init(&hiokp->ipackets,		"ipackets",
1501 	    KSTAT_DATA_ULONG);
1502 	kstat_named_init(&hiokp->ierrors,		"ierrors",
1503 	    KSTAT_DATA_ULONG);
1504 	kstat_named_init(&hiokp->opackets,		"opackets",
1505 	    KSTAT_DATA_ULONG);
1506 	kstat_named_init(&hiokp->oerrors,		"oerrors",
1507 	    KSTAT_DATA_ULONG);
1508 
1509 
1510 	/* MIB II kstat variables */
1511 	kstat_named_init(&hiokp->rbytes,		"rbytes",
1512 	    KSTAT_DATA_ULONG);
1513 	kstat_named_init(&hiokp->obytes,		"obytes",
1514 	    KSTAT_DATA_ULONG);
1515 	kstat_named_init(&hiokp->multircv,		"multircv",
1516 	    KSTAT_DATA_ULONG);
1517 	kstat_named_init(&hiokp->multixmt,		"multixmt",
1518 	    KSTAT_DATA_ULONG);
1519 	kstat_named_init(&hiokp->brdcstrcv,		"brdcstrcv",
1520 	    KSTAT_DATA_ULONG);
1521 	kstat_named_init(&hiokp->brdcstxmt,		"brdcstxmt",
1522 	    KSTAT_DATA_ULONG);
1523 	kstat_named_init(&hiokp->norcvbuf,		"norcvbuf",
1524 	    KSTAT_DATA_ULONG);
1525 	kstat_named_init(&hiokp->noxmtbuf,		"noxmtbuf",
1526 	    KSTAT_DATA_ULONG);
1527 
1528 	ksp->ks_update = vnet_hio_update_kstats;
1529 	ksp->ks_private = (void *)vresp;
1530 	kstat_install(ksp);
1531 	return (ksp);
1532 }
1533 
1534 /*
1535  * Destroy kstats.
1536  */
1537 static void
1538 vnet_hio_destroy_kstats(kstat_t *ksp)
1539 {
1540 	if (ksp != NULL)
1541 		kstat_delete(ksp);
1542 }
1543 
1544 /*
1545  * Update the kstats.
1546  */
1547 static int
1548 vnet_hio_update_kstats(kstat_t *ksp, int rw)
1549 {
1550 	vnet_t *vnetp;
1551 	vnet_res_t *vresp;
1552 	vnet_hio_stats_t statsp;
1553 	vnet_hio_kstats_t *hiokp;
1554 
1555 	vresp = (vnet_res_t *)ksp->ks_private;
1556 	vnetp = vresp->vnetp;
1557 
1558 	bzero(&statsp, sizeof (vnet_hio_stats_t));
1559 
1560 	READ_ENTER(&vnetp->vsw_fp_rw);
1561 	if (vnetp->hio_fp == NULL) {
1562 		/* not using hio resources, just return */
1563 		RW_EXIT(&vnetp->vsw_fp_rw);
1564 		return (0);
1565 	}
1566 	VNET_FDBE_REFHOLD(vnetp->hio_fp);
1567 	RW_EXIT(&vnetp->vsw_fp_rw);
1568 	vnet_hio_get_stats(vnetp->hio_fp, &statsp);
1569 	VNET_FDBE_REFRELE(vnetp->hio_fp);
1570 
1571 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1572 
1573 	if (rw == KSTAT_READ) {
1574 		/* Link Input/Output stats */
1575 		hiokp->ipackets.value.ul	= (uint32_t)statsp.ipackets;
1576 		hiokp->ipackets64.value.ull	= statsp.ipackets;
1577 		hiokp->ierrors.value.ul		= statsp.ierrors;
1578 		hiokp->opackets.value.ul	= (uint32_t)statsp.opackets;
1579 		hiokp->opackets64.value.ull	= statsp.opackets;
1580 		hiokp->oerrors.value.ul		= statsp.oerrors;
1581 
1582 		/* MIB II kstat variables */
1583 		hiokp->rbytes.value.ul		= (uint32_t)statsp.rbytes;
1584 		hiokp->rbytes64.value.ull	= statsp.rbytes;
1585 		hiokp->obytes.value.ul		= (uint32_t)statsp.obytes;
1586 		hiokp->obytes64.value.ull	= statsp.obytes;
1587 		hiokp->multircv.value.ul	= statsp.multircv;
1588 		hiokp->multixmt.value.ul	= statsp.multixmt;
1589 		hiokp->brdcstrcv.value.ul	= statsp.brdcstrcv;
1590 		hiokp->brdcstxmt.value.ul	= statsp.brdcstxmt;
1591 		hiokp->norcvbuf.value.ul	= statsp.norcvbuf;
1592 		hiokp->noxmtbuf.value.ul	= statsp.noxmtbuf;
1593 	} else {
1594 		return (EACCES);
1595 	}
1596 
1597 	return (0);
1598 }
1599 
1600 static void
1601 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp)
1602 {
1603 	mac_register_t		*macp;
1604 	mac_callbacks_t		*cbp;
1605 	uint64_t		val;
1606 	int			stat;
1607 
1608 	/*
1609 	 * get the specified statistics from the underlying nxge.
1610 	 */
1611 	macp = &vresp->macreg;
1612 	cbp = macp->m_callbacks;
1613 	for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) {
1614 		if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) {
1615 			switch (stat) {
1616 			case MAC_STAT_IPACKETS:
1617 				statsp->ipackets = val;
1618 				break;
1619 
1620 			case MAC_STAT_IERRORS:
1621 				statsp->ierrors = val;
1622 				break;
1623 
1624 			case MAC_STAT_OPACKETS:
1625 				statsp->opackets = val;
1626 				break;
1627 
1628 			case MAC_STAT_OERRORS:
1629 				statsp->oerrors = val;
1630 				break;
1631 
1632 			case MAC_STAT_RBYTES:
1633 				statsp->rbytes = val;
1634 				break;
1635 
1636 			case MAC_STAT_OBYTES:
1637 				statsp->obytes = val;
1638 				break;
1639 
1640 			case MAC_STAT_MULTIRCV:
1641 				statsp->multircv = val;
1642 				break;
1643 
1644 			case MAC_STAT_MULTIXMT:
1645 				statsp->multixmt = val;
1646 				break;
1647 
1648 			case MAC_STAT_BRDCSTRCV:
1649 				statsp->brdcstrcv = val;
1650 				break;
1651 
1652 			case MAC_STAT_BRDCSTXMT:
1653 				statsp->brdcstxmt = val;
1654 				break;
1655 
1656 			case MAC_STAT_NOXMTBUF:
1657 				statsp->noxmtbuf = val;
1658 				break;
1659 
1660 			case MAC_STAT_NORCVBUF:
1661 				statsp->norcvbuf = val;
1662 				break;
1663 
1664 			default:
1665 				/*
1666 				 * parameters not interested.
1667 				 */
1668 				break;
1669 			}
1670 		}
1671 	}
1672 }
1673 
1674 #ifdef	VNET_IOC_DEBUG
1675 
1676 /*
1677  * The ioctl entry point is used only for debugging for now. The ioctl commands
1678  * can be used to force the link state of the channel connected to vsw.
1679  */
1680 static void
1681 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1682 {
1683 	struct iocblk	*iocp;
1684 	vnet_t		*vnetp;
1685 
1686 	iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
1687 	iocp->ioc_error = 0;
1688 	vnetp = (vnet_t *)arg;
1689 
1690 	if (vnetp == NULL) {
1691 		miocnak(q, mp, 0, EINVAL);
1692 		return;
1693 	}
1694 
1695 	switch (iocp->ioc_cmd) {
1696 
1697 	case VNET_FORCE_LINK_DOWN:
1698 	case VNET_FORCE_LINK_UP:
1699 		vnet_force_link_state(vnetp, q, mp);
1700 		break;
1701 
1702 	default:
1703 		iocp->ioc_error = EINVAL;
1704 		miocnak(q, mp, 0, iocp->ioc_error);
1705 		break;
1706 
1707 	}
1708 }
1709 
1710 static void
1711 vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp)
1712 {
1713 	mac_register_t	*macp;
1714 	mac_callbacks_t	*cbp;
1715 	vnet_res_t	*vresp;
1716 
1717 	READ_ENTER(&vnetp->vsw_fp_rw);
1718 
1719 	vresp = vnetp->vsw_fp;
1720 	if (vresp == NULL) {
1721 		RW_EXIT(&vnetp->vsw_fp_rw);
1722 		return;
1723 	}
1724 
1725 	macp = &vresp->macreg;
1726 	cbp = macp->m_callbacks;
1727 	cbp->mc_ioctl(macp->m_driver, q, mp);
1728 
1729 	RW_EXIT(&vnetp->vsw_fp_rw);
1730 }
1731 
1732 #else
1733 
1734 static void
1735 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1736 {
1737 	vnet_t		*vnetp;
1738 
1739 	vnetp = (vnet_t *)arg;
1740 
1741 	if (vnetp == NULL) {
1742 		miocnak(q, mp, 0, EINVAL);
1743 		return;
1744 	}
1745 
1746 	/* ioctl support only for debugging */
1747 	miocnak(q, mp, 0, ENOTSUP);
1748 }
1749 
1750 #endif
1751