xref: /illumos-gate/usr/src/uts/sun4v/io/vnet.c (revision 07a48826732249fcd3aa8dd53c8389595e9f1fbc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/param.h>
30 #include <sys/stream.h>
31 #include <sys/kmem.h>
32 #include <sys/conf.h>
33 #include <sys/devops.h>
34 #include <sys/ksynch.h>
35 #include <sys/stat.h>
36 #include <sys/modctl.h>
37 #include <sys/modhash.h>
38 #include <sys/debug.h>
39 #include <sys/ethernet.h>
40 #include <sys/dlpi.h>
41 #include <net/if.h>
42 #include <sys/mac_provider.h>
43 #include <sys/mac_ether.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/strsun.h>
47 #include <sys/note.h>
48 #include <sys/atomic.h>
49 #include <sys/vnet.h>
50 #include <sys/vlan.h>
51 #include <sys/vnet_mailbox.h>
52 #include <sys/vnet_common.h>
53 #include <sys/dds.h>
54 #include <sys/strsubr.h>
55 #include <sys/taskq.h>
56 
57 /*
58  * Function prototypes.
59  */
60 
61 /* DDI entrypoints */
62 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
63 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
64 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
65 
66 /* MAC entrypoints  */
67 static int vnet_m_stat(void *, uint_t, uint64_t *);
68 static int vnet_m_start(void *);
69 static void vnet_m_stop(void *);
70 static int vnet_m_promisc(void *, boolean_t);
71 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
72 static int vnet_m_unicst(void *, const uint8_t *);
73 mblk_t *vnet_m_tx(void *, mblk_t *);
74 static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp);
75 #ifdef	VNET_IOC_DEBUG
76 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp);
77 #endif
78 
79 /* vnet internal functions */
80 static int vnet_unattach(vnet_t *vnetp);
81 static int vnet_mac_register(vnet_t *);
82 static int vnet_read_mac_address(vnet_t *vnetp);
83 
84 /* Forwarding database (FDB) routines */
85 static void vnet_fdb_create(vnet_t *vnetp);
86 static void vnet_fdb_destroy(vnet_t *vnetp);
87 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp);
88 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
89 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp);
90 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp);
91 
92 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp);
93 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp);
94 static void vnet_tx_update(vio_net_handle_t vrh);
95 static void vnet_res_start_task(void *arg);
96 static void vnet_start_resources(vnet_t *vnetp);
97 static void vnet_stop_resources(vnet_t *vnetp);
98 static void vnet_dispatch_res_task(vnet_t *vnetp);
99 static void vnet_res_start_task(void *arg);
100 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
101 
102 /* Exported to vnet_gen */
103 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
104 void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
105 void vnet_dds_cleanup_hio(vnet_t *vnetp);
106 
107 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name,
108     vnet_res_t *vresp);
109 static int vnet_hio_update_kstats(kstat_t *ksp, int rw);
110 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp);
111 static void vnet_hio_destroy_kstats(kstat_t *ksp);
112 
113 /* Exported to to vnet_dds */
114 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
115 
116 /* Externs that are imported from vnet_gen */
117 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
118     const uint8_t *macaddr, void **vgenhdl);
119 extern void vgen_uninit(void *arg);
120 extern int vgen_dds_tx(void *arg, void *dmsg);
121 extern void vgen_mod_init(void);
122 extern int vgen_mod_cleanup(void);
123 extern void vgen_mod_fini(void);
124 
125 /* Externs that are imported from vnet_dds */
126 extern void vdds_mod_init(void);
127 extern void vdds_mod_fini(void);
128 extern int vdds_init(vnet_t *vnetp);
129 extern void vdds_cleanup(vnet_t *vnetp);
130 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
131 extern void vdds_cleanup_hybrid_res(void *arg);
132 extern void vdds_cleanup_hio(vnet_t *vnetp);
133 
134 #define	DRV_NAME	"vnet"
135 #define	VNET_FDBE_REFHOLD(p)						\
136 {									\
137 	atomic_inc_32(&(p)->refcnt);					\
138 	ASSERT((p)->refcnt != 0);					\
139 }
140 
141 #define	VNET_FDBE_REFRELE(p)						\
142 {									\
143 	ASSERT((p)->refcnt != 0);					\
144 	atomic_dec_32(&(p)->refcnt);					\
145 }
146 
147 #ifdef	VNET_IOC_DEBUG
148 #define	VNET_M_CALLBACK_FLAGS	(MC_IOCTL)
149 #else
150 #define	VNET_M_CALLBACK_FLAGS	(0)
151 #endif
152 
153 static mac_callbacks_t vnet_m_callbacks = {
154 	VNET_M_CALLBACK_FLAGS,
155 	vnet_m_stat,
156 	vnet_m_start,
157 	vnet_m_stop,
158 	vnet_m_promisc,
159 	vnet_m_multicst,
160 	vnet_m_unicst,
161 	vnet_m_tx,
162 	vnet_m_ioctl,
163 	NULL,
164 	NULL
165 };
166 
167 /*
168  * Linked list of "vnet_t" structures - one per instance.
169  */
170 static vnet_t	*vnet_headp = NULL;
171 static krwlock_t vnet_rw;
172 
173 /* Tunables */
174 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
175 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
176 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
177 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
178 
179 /*
180  * Set this to non-zero to enable additional internal receive buffer pools
181  * based on the MTU of the device for better performance at the cost of more
182  * memory consumption. This is turned off by default, to use allocb(9F) for
183  * receive buffer allocations of sizes > 2K.
184  */
185 boolean_t vnet_jumbo_rxpools = B_FALSE;
186 
187 /* # of chains in fdb hash table */
188 uint32_t	vnet_fdb_nchains = VNET_NFDB_HASH;
189 
190 /* Internal tunables */
191 uint32_t	vnet_ethermtu = 1500;	/* mtu of the device */
192 
193 /*
194  * Default vlan id. This is only used internally when the "default-vlan-id"
195  * property is not present in the MD device node. Therefore, this should not be
196  * used as a tunable; if this value is changed, the corresponding variable
197  * should be updated to the same value in vsw and also other vnets connected to
198  * the same vsw.
199  */
200 uint16_t	vnet_default_vlan_id = 1;
201 
202 /* delay in usec to wait for all references on a fdb entry to be dropped */
203 uint32_t vnet_fdbe_refcnt_delay = 10;
204 
205 static struct ether_addr etherbroadcastaddr = {
206 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
207 };
208 
209 
210 /*
211  * Property names
212  */
213 static char macaddr_propname[] = "local-mac-address";
214 
215 /*
216  * This is the string displayed by modinfo(1m).
217  */
218 static char vnet_ident[] = "vnet driver";
219 extern struct mod_ops mod_driverops;
220 static struct cb_ops cb_vnetops = {
221 	nulldev,		/* cb_open */
222 	nulldev,		/* cb_close */
223 	nodev,			/* cb_strategy */
224 	nodev,			/* cb_print */
225 	nodev,			/* cb_dump */
226 	nodev,			/* cb_read */
227 	nodev,			/* cb_write */
228 	nodev,			/* cb_ioctl */
229 	nodev,			/* cb_devmap */
230 	nodev,			/* cb_mmap */
231 	nodev,			/* cb_segmap */
232 	nochpoll,		/* cb_chpoll */
233 	ddi_prop_op,		/* cb_prop_op */
234 	NULL,			/* cb_stream */
235 	(int)(D_MP)		/* cb_flag */
236 };
237 
238 static struct dev_ops vnetops = {
239 	DEVO_REV,		/* devo_rev */
240 	0,			/* devo_refcnt */
241 	NULL,			/* devo_getinfo */
242 	nulldev,		/* devo_identify */
243 	nulldev,		/* devo_probe */
244 	vnetattach,		/* devo_attach */
245 	vnetdetach,		/* devo_detach */
246 	nodev,			/* devo_reset */
247 	&cb_vnetops,		/* devo_cb_ops */
248 	(struct bus_ops *)NULL,	/* devo_bus_ops */
249 	NULL,			/* devo_power */
250 	ddi_quiesce_not_supported,	/* devo_quiesce */
251 };
252 
253 static struct modldrv modldrv = {
254 	&mod_driverops,		/* Type of module.  This one is a driver */
255 	vnet_ident,		/* ID string */
256 	&vnetops		/* driver specific ops */
257 };
258 
259 static struct modlinkage modlinkage = {
260 	MODREV_1, (void *)&modldrv, NULL
261 };
262 
263 #ifdef DEBUG
264 
265 /*
266  * Print debug messages - set to 0xf to enable all msgs
267  */
268 int vnet_dbglevel = 0x8;
269 
270 static void
271 debug_printf(const char *fname, void *arg, const char *fmt, ...)
272 {
273 	char    buf[512];
274 	va_list ap;
275 	vnet_t *vnetp = (vnet_t *)arg;
276 	char    *bufp = buf;
277 
278 	if (vnetp == NULL) {
279 		(void) sprintf(bufp, "%s: ", fname);
280 		bufp += strlen(bufp);
281 	} else {
282 		(void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
283 		bufp += strlen(bufp);
284 	}
285 	va_start(ap, fmt);
286 	(void) vsprintf(bufp, fmt, ap);
287 	va_end(ap);
288 	cmn_err(CE_CONT, "%s\n", buf);
289 }
290 
291 #endif
292 
293 /* _init(9E): initialize the loadable module */
294 int
295 _init(void)
296 {
297 	int status;
298 
299 	DBG1(NULL, "enter\n");
300 
301 	mac_init_ops(&vnetops, "vnet");
302 	status = mod_install(&modlinkage);
303 	if (status != 0) {
304 		mac_fini_ops(&vnetops);
305 	}
306 	vdds_mod_init();
307 	vgen_mod_init();
308 	DBG1(NULL, "exit(%d)\n", status);
309 	return (status);
310 }
311 
312 /* _fini(9E): prepare the module for unloading. */
313 int
314 _fini(void)
315 {
316 	int		status;
317 
318 	DBG1(NULL, "enter\n");
319 
320 	status = vgen_mod_cleanup();
321 	if (status != 0)
322 		return (status);
323 
324 	status = mod_remove(&modlinkage);
325 	if (status != 0)
326 		return (status);
327 	mac_fini_ops(&vnetops);
328 	vgen_mod_fini();
329 	vdds_mod_fini();
330 
331 	DBG1(NULL, "exit(%d)\n", status);
332 	return (status);
333 }
334 
335 /* _info(9E): return information about the loadable module */
336 int
337 _info(struct modinfo *modinfop)
338 {
339 	return (mod_info(&modlinkage, modinfop));
340 }
341 
342 /*
343  * attach(9E): attach a device to the system.
344  * called once for each instance of the device on the system.
345  */
346 static int
347 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
348 {
349 	vnet_t			*vnetp;
350 	int			status;
351 	int			instance;
352 	uint64_t		reg;
353 	char			qname[TASKQ_NAMELEN];
354 	vnet_attach_progress_t	attach_progress;
355 
356 	attach_progress = AST_init;
357 
358 	switch (cmd) {
359 	case DDI_ATTACH:
360 		break;
361 	case DDI_RESUME:
362 	case DDI_PM_RESUME:
363 	default:
364 		goto vnet_attach_fail;
365 	}
366 
367 	instance = ddi_get_instance(dip);
368 	DBG1(NULL, "instance(%d) enter\n", instance);
369 
370 	/* allocate vnet_t and mac_t structures */
371 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
372 	vnetp->dip = dip;
373 	vnetp->instance = instance;
374 	rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL);
375 	rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
376 	attach_progress |= AST_vnet_alloc;
377 
378 	status = vdds_init(vnetp);
379 	if (status != 0) {
380 		goto vnet_attach_fail;
381 	}
382 	attach_progress |= AST_vdds_init;
383 
384 	/* setup links to vnet_t from both devinfo and mac_t */
385 	ddi_set_driver_private(dip, (caddr_t)vnetp);
386 
387 	/* read the mac address */
388 	status = vnet_read_mac_address(vnetp);
389 	if (status != DDI_SUCCESS) {
390 		goto vnet_attach_fail;
391 	}
392 	attach_progress |= AST_read_macaddr;
393 
394 	reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
395 	    DDI_PROP_DONTPASS, "reg", -1);
396 	if (reg == -1) {
397 		goto vnet_attach_fail;
398 	}
399 	vnetp->reg = reg;
400 
401 	vnet_fdb_create(vnetp);
402 	attach_progress |= AST_fdbh_alloc;
403 
404 	(void) snprintf(qname, TASKQ_NAMELEN, "vnet_taskq%d", instance);
405 	if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1,
406 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
407 		cmn_err(CE_WARN, "!vnet%d: Unable to create task queue",
408 		    instance);
409 		goto vnet_attach_fail;
410 	}
411 	attach_progress |= AST_taskq_create;
412 
413 	/* add to the list of vnet devices */
414 	WRITE_ENTER(&vnet_rw);
415 	vnetp->nextp = vnet_headp;
416 	vnet_headp = vnetp;
417 	RW_EXIT(&vnet_rw);
418 
419 	attach_progress |= AST_vnet_list;
420 
421 	/*
422 	 * Initialize the generic vnet plugin which provides
423 	 * communication via sun4v LDC (logical domain channel) based
424 	 * resources. It will register the LDC resources as and when
425 	 * they become available.
426 	 */
427 	status = vgen_init(vnetp, reg, vnetp->dip,
428 	    (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
429 	if (status != DDI_SUCCESS) {
430 		DERR(vnetp, "vgen_init() failed\n");
431 		goto vnet_attach_fail;
432 	}
433 	attach_progress |= AST_vgen_init;
434 
435 	/* register with MAC layer */
436 	status = vnet_mac_register(vnetp);
437 	if (status != DDI_SUCCESS) {
438 		goto vnet_attach_fail;
439 	}
440 	vnetp->link_state = LINK_STATE_UNKNOWN;
441 
442 	attach_progress |= AST_macreg;
443 
444 	vnetp->attach_progress = attach_progress;
445 
446 	DBG1(NULL, "instance(%d) exit\n", instance);
447 	return (DDI_SUCCESS);
448 
449 vnet_attach_fail:
450 	vnetp->attach_progress = attach_progress;
451 	status = vnet_unattach(vnetp);
452 	ASSERT(status == 0);
453 	return (DDI_FAILURE);
454 }
455 
456 /*
457  * detach(9E): detach a device from the system.
458  */
459 static int
460 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
461 {
462 	vnet_t		*vnetp;
463 	int		instance;
464 
465 	instance = ddi_get_instance(dip);
466 	DBG1(NULL, "instance(%d) enter\n", instance);
467 
468 	vnetp = ddi_get_driver_private(dip);
469 	if (vnetp == NULL) {
470 		goto vnet_detach_fail;
471 	}
472 
473 	switch (cmd) {
474 	case DDI_DETACH:
475 		break;
476 	case DDI_SUSPEND:
477 	case DDI_PM_SUSPEND:
478 	default:
479 		goto vnet_detach_fail;
480 	}
481 
482 	if (vnet_unattach(vnetp) != 0) {
483 		goto vnet_detach_fail;
484 	}
485 
486 	return (DDI_SUCCESS);
487 
488 vnet_detach_fail:
489 	return (DDI_FAILURE);
490 }
491 
492 /*
493  * Common routine to handle vnetattach() failure and vnetdetach(). Note that
494  * the only reason this function could fail is if mac_unregister() fails.
495  * Otherwise, this function must ensure that all resources are freed and return
496  * success.
497  */
498 static int
499 vnet_unattach(vnet_t *vnetp)
500 {
501 	vnet_attach_progress_t	attach_progress;
502 
503 	attach_progress = vnetp->attach_progress;
504 
505 	/*
506 	 * Unregister from the gldv3 subsystem. This can fail, in particular
507 	 * if there are still any open references to this mac device; in which
508 	 * case we just return failure without continuing to detach further.
509 	 */
510 	if (attach_progress & AST_macreg) {
511 		if (mac_unregister(vnetp->mh) != 0) {
512 			return (1);
513 		}
514 		attach_progress &= ~AST_macreg;
515 	}
516 
517 	/*
518 	 * Now that we have unregistered from gldv3, we must finish all other
519 	 * steps and successfully return from this function; otherwise we will
520 	 * end up leaving the device in a broken/unusable state.
521 	 *
522 	 * First, release any hybrid resources assigned to this vnet device.
523 	 */
524 	if (attach_progress & AST_vdds_init) {
525 		vdds_cleanup(vnetp);
526 		attach_progress &= ~AST_vdds_init;
527 	}
528 
529 	/*
530 	 * Uninit vgen. This stops further mdeg callbacks to this vnet
531 	 * device and/or its ports; and detaches any existing ports.
532 	 */
533 	if (attach_progress & AST_vgen_init) {
534 		vgen_uninit(vnetp->vgenhdl);
535 		attach_progress &= ~AST_vgen_init;
536 	}
537 
538 	/* Destroy the taskq. */
539 	if (attach_progress & AST_taskq_create) {
540 		ddi_taskq_destroy(vnetp->taskqp);
541 		attach_progress &= ~AST_taskq_create;
542 	}
543 
544 	/* Destroy fdb. */
545 	if (attach_progress & AST_fdbh_alloc) {
546 		vnet_fdb_destroy(vnetp);
547 		attach_progress &= ~AST_fdbh_alloc;
548 	}
549 
550 	/* Remove from the device list */
551 	if (attach_progress & AST_vnet_list) {
552 		vnet_t		**vnetpp;
553 		/* unlink from instance(vnet_t) list */
554 		WRITE_ENTER(&vnet_rw);
555 		for (vnetpp = &vnet_headp; *vnetpp;
556 		    vnetpp = &(*vnetpp)->nextp) {
557 			if (*vnetpp == vnetp) {
558 				*vnetpp = vnetp->nextp;
559 				break;
560 			}
561 		}
562 		RW_EXIT(&vnet_rw);
563 		attach_progress &= ~AST_vnet_list;
564 	}
565 
566 	if (attach_progress & AST_vnet_alloc) {
567 		rw_destroy(&vnetp->vrwlock);
568 		rw_destroy(&vnetp->vsw_fp_rw);
569 		attach_progress &= ~AST_vnet_list;
570 		KMEM_FREE(vnetp);
571 	}
572 
573 	return (0);
574 }
575 
576 /* enable the device for transmit/receive */
577 static int
578 vnet_m_start(void *arg)
579 {
580 	vnet_t		*vnetp = arg;
581 
582 	DBG1(vnetp, "enter\n");
583 
584 	WRITE_ENTER(&vnetp->vrwlock);
585 	vnetp->flags |= VNET_STARTED;
586 	vnet_start_resources(vnetp);
587 	RW_EXIT(&vnetp->vrwlock);
588 
589 	DBG1(vnetp, "exit\n");
590 	return (VNET_SUCCESS);
591 
592 }
593 
594 /* stop transmit/receive for the device */
595 static void
596 vnet_m_stop(void *arg)
597 {
598 	vnet_t		*vnetp = arg;
599 
600 	DBG1(vnetp, "enter\n");
601 
602 	WRITE_ENTER(&vnetp->vrwlock);
603 	if (vnetp->flags & VNET_STARTED) {
604 		/*
605 		 * Set the flags appropriately; this should prevent starting of
606 		 * any new resources that are added(see vnet_res_start_task()),
607 		 * while we release the vrwlock in vnet_stop_resources() before
608 		 * stopping each resource.
609 		 */
610 		vnetp->flags &= ~VNET_STARTED;
611 		vnetp->flags |= VNET_STOPPING;
612 		vnet_stop_resources(vnetp);
613 		vnetp->flags &= ~VNET_STOPPING;
614 	}
615 	RW_EXIT(&vnetp->vrwlock);
616 
617 	DBG1(vnetp, "exit\n");
618 }
619 
620 /* set the unicast mac address of the device */
621 static int
622 vnet_m_unicst(void *arg, const uint8_t *macaddr)
623 {
624 	_NOTE(ARGUNUSED(macaddr))
625 
626 	vnet_t *vnetp = arg;
627 
628 	DBG1(vnetp, "enter\n");
629 	/*
630 	 * NOTE: setting mac address dynamically is not supported.
631 	 */
632 	DBG1(vnetp, "exit\n");
633 
634 	return (VNET_FAILURE);
635 }
636 
637 /* enable/disable a multicast address */
638 static int
639 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
640 {
641 	_NOTE(ARGUNUSED(add, mca))
642 
643 	vnet_t *vnetp = arg;
644 	vnet_res_t	*vresp;
645 	mac_register_t	*macp;
646 	mac_callbacks_t	*cbp;
647 	int rv = VNET_SUCCESS;
648 
649 	DBG1(vnetp, "enter\n");
650 
651 	READ_ENTER(&vnetp->vrwlock);
652 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
653 		if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
654 			macp = &vresp->macreg;
655 			cbp = macp->m_callbacks;
656 			rv = cbp->mc_multicst(macp->m_driver, add, mca);
657 		}
658 	}
659 	RW_EXIT(&vnetp->vrwlock);
660 
661 	DBG1(vnetp, "exit(%d)\n", rv);
662 	return (rv);
663 }
664 
665 /* set or clear promiscuous mode on the device */
666 static int
667 vnet_m_promisc(void *arg, boolean_t on)
668 {
669 	_NOTE(ARGUNUSED(on))
670 
671 	vnet_t *vnetp = arg;
672 	DBG1(vnetp, "enter\n");
673 	/*
674 	 * NOTE: setting promiscuous mode is not supported, just return success.
675 	 */
676 	DBG1(vnetp, "exit\n");
677 	return (VNET_SUCCESS);
678 }
679 
680 /*
681  * Transmit a chain of packets. This function provides switching functionality
682  * based on the destination mac address to reach other guests (within ldoms) or
683  * external hosts.
684  */
685 mblk_t *
686 vnet_m_tx(void *arg, mblk_t *mp)
687 {
688 	vnet_t			*vnetp;
689 	vnet_res_t		*vresp;
690 	mblk_t			*next;
691 	mblk_t			*resid_mp;
692 	mac_register_t		*macp;
693 	struct ether_header	*ehp;
694 	boolean_t		is_unicast;
695 	boolean_t		is_pvid;	/* non-default pvid ? */
696 	boolean_t		hres;		/* Hybrid resource ? */
697 
698 	vnetp = (vnet_t *)arg;
699 	DBG1(vnetp, "enter\n");
700 	ASSERT(mp != NULL);
701 
702 	is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE;
703 
704 	while (mp != NULL) {
705 
706 		next = mp->b_next;
707 		mp->b_next = NULL;
708 
709 		/*
710 		 * Find fdb entry for the destination
711 		 * and hold a reference to it.
712 		 */
713 		ehp = (struct ether_header *)mp->b_rptr;
714 		vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
715 		if (vresp != NULL) {
716 
717 			/*
718 			 * Destination found in FDB.
719 			 * The destination is a vnet device within ldoms
720 			 * and directly reachable, invoke the tx function
721 			 * in the fdb entry.
722 			 */
723 			macp = &vresp->macreg;
724 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
725 
726 			/* tx done; now release ref on fdb entry */
727 			VNET_FDBE_REFRELE(vresp);
728 
729 			if (resid_mp != NULL) {
730 				/* m_tx failed */
731 				mp->b_next = next;
732 				break;
733 			}
734 		} else {
735 			is_unicast = !(IS_BROADCAST(ehp) ||
736 			    (IS_MULTICAST(ehp)));
737 			/*
738 			 * Destination is not in FDB.
739 			 * If the destination is broadcast or multicast,
740 			 * then forward the packet to vswitch.
741 			 * If a Hybrid resource avilable, then send the
742 			 * unicast packet via hybrid resource, otherwise
743 			 * forward it to vswitch.
744 			 */
745 			READ_ENTER(&vnetp->vsw_fp_rw);
746 
747 			if ((is_unicast) && (vnetp->hio_fp != NULL)) {
748 				vresp = vnetp->hio_fp;
749 				hres = B_TRUE;
750 			} else {
751 				vresp = vnetp->vsw_fp;
752 				hres = B_FALSE;
753 			}
754 			if (vresp == NULL) {
755 				/*
756 				 * no fdb entry to vsw? drop the packet.
757 				 */
758 				RW_EXIT(&vnetp->vsw_fp_rw);
759 				freemsg(mp);
760 				mp = next;
761 				continue;
762 			}
763 
764 			/* ref hold the fdb entry to vsw */
765 			VNET_FDBE_REFHOLD(vresp);
766 
767 			RW_EXIT(&vnetp->vsw_fp_rw);
768 
769 			/*
770 			 * In the case of a hybrid resource we need to insert
771 			 * the tag for the pvid case here; unlike packets that
772 			 * are destined to a vnet/vsw in which case the vgen
773 			 * layer does the tagging before sending it over ldc.
774 			 */
775 			if (hres == B_TRUE) {
776 				/*
777 				 * Determine if the frame being transmitted
778 				 * over the hybrid resource is untagged. If so,
779 				 * insert the tag before transmitting.
780 				 */
781 				if (is_pvid == B_TRUE &&
782 				    ehp->ether_type != htons(ETHERTYPE_VLAN)) {
783 
784 					mp = vnet_vlan_insert_tag(mp,
785 					    vnetp->pvid);
786 					if (mp == NULL) {
787 						VNET_FDBE_REFRELE(vresp);
788 						mp = next;
789 						continue;
790 					}
791 
792 				}
793 			}
794 
795 			macp = &vresp->macreg;
796 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
797 
798 			/* tx done; now release ref on fdb entry */
799 			VNET_FDBE_REFRELE(vresp);
800 
801 			if (resid_mp != NULL) {
802 				/* m_tx failed */
803 				mp->b_next = next;
804 				break;
805 			}
806 		}
807 
808 		mp = next;
809 	}
810 
811 	DBG1(vnetp, "exit\n");
812 	return (mp);
813 }
814 
815 /* get statistics from the device */
816 int
817 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
818 {
819 	vnet_t *vnetp = arg;
820 	vnet_res_t	*vresp;
821 	mac_register_t	*macp;
822 	mac_callbacks_t	*cbp;
823 	uint64_t val_total = 0;
824 
825 	DBG1(vnetp, "enter\n");
826 
827 	/*
828 	 * get the specified statistic from each transport and return the
829 	 * aggregate val.  This obviously only works for counters.
830 	 */
831 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
832 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
833 		return (ENOTSUP);
834 	}
835 
836 	READ_ENTER(&vnetp->vrwlock);
837 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
838 		macp = &vresp->macreg;
839 		cbp = macp->m_callbacks;
840 		if (cbp->mc_getstat(macp->m_driver, stat, val) == 0)
841 			val_total += *val;
842 	}
843 	RW_EXIT(&vnetp->vrwlock);
844 
845 	*val = val_total;
846 
847 	DBG1(vnetp, "exit\n");
848 	return (0);
849 }
850 
851 /* wrapper function for mac_register() */
852 static int
853 vnet_mac_register(vnet_t *vnetp)
854 {
855 	mac_register_t	*macp;
856 	int		err;
857 
858 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
859 		return (DDI_FAILURE);
860 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
861 	macp->m_driver = vnetp;
862 	macp->m_dip = vnetp->dip;
863 	macp->m_src_addr = vnetp->curr_macaddr;
864 	macp->m_callbacks = &vnet_m_callbacks;
865 	macp->m_min_sdu = 0;
866 	macp->m_max_sdu = vnetp->mtu;
867 	macp->m_margin = VLAN_TAGSZ;
868 
869 	/*
870 	 * Finally, we're ready to register ourselves with the MAC layer
871 	 * interface; if this succeeds, we're all ready to start()
872 	 */
873 	err = mac_register(macp, &vnetp->mh);
874 	mac_free(macp);
875 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
876 }
877 
878 /* read the mac address of the device */
879 static int
880 vnet_read_mac_address(vnet_t *vnetp)
881 {
882 	uchar_t 	*macaddr;
883 	uint32_t 	size;
884 	int 		rv;
885 
886 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
887 	    DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
888 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
889 		DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
890 		    macaddr_propname, rv);
891 		return (DDI_FAILURE);
892 	}
893 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
894 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
895 	ddi_prop_free(macaddr);
896 
897 	return (DDI_SUCCESS);
898 }
899 
900 static void
901 vnet_fdb_create(vnet_t *vnetp)
902 {
903 	char		hashname[MAXNAMELEN];
904 
905 	(void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
906 	    vnetp->instance);
907 	vnetp->fdb_nchains = vnet_fdb_nchains;
908 	vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
909 	    mod_hash_null_valdtor, sizeof (void *));
910 }
911 
912 static void
913 vnet_fdb_destroy(vnet_t *vnetp)
914 {
915 	/* destroy fdb-hash-table */
916 	if (vnetp->fdb_hashp != NULL) {
917 		mod_hash_destroy_hash(vnetp->fdb_hashp);
918 		vnetp->fdb_hashp = NULL;
919 		vnetp->fdb_nchains = 0;
920 	}
921 }
922 
923 /*
924  * Add an entry into the fdb.
925  */
926 void
927 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp)
928 {
929 	uint64_t	addr = 0;
930 	int		rv;
931 
932 	KEY_HASH(addr, vresp->rem_macaddr);
933 
934 	/*
935 	 * If the entry being added corresponds to LDC_SERVICE resource,
936 	 * that is, vswitch connection, it is added to the hash and also
937 	 * the entry is cached, an additional reference count reflects
938 	 * this. The HYBRID resource is not added to the hash, but only
939 	 * cached, as it is only used for sending out packets for unknown
940 	 * unicast destinations.
941 	 */
942 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
943 	    (vresp->refcnt = 1) : (vresp->refcnt = 0);
944 
945 	/*
946 	 * Note: duplicate keys will be rejected by mod_hash.
947 	 */
948 	if (vresp->type != VIO_NET_RES_HYBRID) {
949 		rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
950 		    (mod_hash_val_t)vresp);
951 		if (rv != 0) {
952 			DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
953 			return;
954 		}
955 	}
956 
957 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
958 		/* Cache the fdb entry to vsw-port */
959 		WRITE_ENTER(&vnetp->vsw_fp_rw);
960 		if (vnetp->vsw_fp == NULL)
961 			vnetp->vsw_fp = vresp;
962 		RW_EXIT(&vnetp->vsw_fp_rw);
963 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
964 		/* Cache the fdb entry to hybrid resource */
965 		WRITE_ENTER(&vnetp->vsw_fp_rw);
966 		if (vnetp->hio_fp == NULL)
967 			vnetp->hio_fp = vresp;
968 		RW_EXIT(&vnetp->vsw_fp_rw);
969 	}
970 }
971 
972 /*
973  * Remove an entry from fdb.
974  */
975 static void
976 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp)
977 {
978 	uint64_t	addr = 0;
979 	int		rv;
980 	uint32_t	refcnt;
981 	vnet_res_t	*tmp;
982 
983 	KEY_HASH(addr, vresp->rem_macaddr);
984 
985 	/*
986 	 * Remove the entry from fdb hash table.
987 	 * This prevents further references to this fdb entry.
988 	 */
989 	if (vresp->type != VIO_NET_RES_HYBRID) {
990 		rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
991 		    (mod_hash_val_t *)&tmp);
992 		if (rv != 0) {
993 			/*
994 			 * As the resources are added to the hash only
995 			 * after they are started, this can occur if
996 			 * a resource unregisters before it is ever started.
997 			 */
998 			return;
999 		}
1000 	}
1001 
1002 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
1003 		WRITE_ENTER(&vnetp->vsw_fp_rw);
1004 
1005 		ASSERT(tmp == vnetp->vsw_fp);
1006 		vnetp->vsw_fp = NULL;
1007 
1008 		RW_EXIT(&vnetp->vsw_fp_rw);
1009 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
1010 		WRITE_ENTER(&vnetp->vsw_fp_rw);
1011 
1012 		vnetp->hio_fp = NULL;
1013 
1014 		RW_EXIT(&vnetp->vsw_fp_rw);
1015 	}
1016 
1017 	/*
1018 	 * If there are threads already ref holding before the entry was
1019 	 * removed from hash table, then wait for ref count to drop to zero.
1020 	 */
1021 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
1022 	    (refcnt = 1) : (refcnt = 0);
1023 	while (vresp->refcnt > refcnt) {
1024 		delay(drv_usectohz(vnet_fdbe_refcnt_delay));
1025 	}
1026 }
1027 
1028 /*
1029  * Search fdb for a given mac address. If an entry is found, hold
1030  * a reference to it and return the entry; else returns NULL.
1031  */
1032 static vnet_res_t *
1033 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
1034 {
1035 	uint64_t	key = 0;
1036 	vnet_res_t	*vresp;
1037 	int		rv;
1038 
1039 	KEY_HASH(key, addrp->ether_addr_octet);
1040 
1041 	rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
1042 	    (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb);
1043 
1044 	if (rv != 0)
1045 		return (NULL);
1046 
1047 	return (vresp);
1048 }
1049 
1050 /*
1051  * Callback function provided to mod_hash_find_cb(). After finding the fdb
1052  * entry corresponding to the key (macaddr), this callback will be invoked by
1053  * mod_hash_find_cb() to atomically increment the reference count on the fdb
1054  * entry before returning the found entry.
1055  */
1056 static void
1057 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
1058 {
1059 	_NOTE(ARGUNUSED(key))
1060 	VNET_FDBE_REFHOLD((vnet_res_t *)val);
1061 }
1062 
1063 /*
1064  * Frames received that are tagged with the pvid of the vnet device must be
1065  * untagged before sending up the stack. This function walks the chain of rx
1066  * frames, untags any such frames and returns the updated chain.
1067  *
1068  * Arguments:
1069  *    pvid:  pvid of the vnet device for which packets are being received
1070  *    mp:    head of pkt chain to be validated and untagged
1071  *
1072  * Returns:
1073  *    mp:    head of updated chain of packets
1074  */
1075 static void
1076 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp)
1077 {
1078 	struct ether_vlan_header	*evhp;
1079 	mblk_t				*bp;
1080 	mblk_t				*bpt;
1081 	mblk_t				*bph;
1082 	mblk_t				*bpn;
1083 
1084 	bpn = bph = bpt = NULL;
1085 
1086 	for (bp = *mp; bp != NULL; bp = bpn) {
1087 
1088 		bpn = bp->b_next;
1089 		bp->b_next = bp->b_prev = NULL;
1090 
1091 		evhp = (struct ether_vlan_header *)bp->b_rptr;
1092 
1093 		if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN &&
1094 		    VLAN_ID(ntohs(evhp->ether_tci)) == pvid) {
1095 
1096 			bp = vnet_vlan_remove_tag(bp);
1097 			if (bp == NULL) {
1098 				continue;
1099 			}
1100 
1101 		}
1102 
1103 		/* build a chain of processed packets */
1104 		if (bph == NULL) {
1105 			bph = bpt = bp;
1106 		} else {
1107 			bpt->b_next = bp;
1108 			bpt = bp;
1109 		}
1110 
1111 	}
1112 
1113 	*mp = bph;
1114 }
1115 
1116 static void
1117 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
1118 {
1119 	vnet_res_t	*vresp = (vnet_res_t *)vrh;
1120 	vnet_t		*vnetp = vresp->vnetp;
1121 
1122 	if ((vnetp == NULL) || (vnetp->mh == 0)) {
1123 		freemsgchain(mp);
1124 		return;
1125 	}
1126 
1127 	/*
1128 	 * Packets received over a hybrid resource need additional processing
1129 	 * to remove the tag, for the pvid case. The underlying resource is
1130 	 * not aware of the vnet's pvid and thus packets are received with the
1131 	 * vlan tag in the header; unlike packets that are received over a ldc
1132 	 * channel in which case the peer vnet/vsw would have already removed
1133 	 * the tag.
1134 	 */
1135 	if (vresp->type == VIO_NET_RES_HYBRID &&
1136 	    vnetp->pvid != vnetp->default_vlan_id) {
1137 
1138 		vnet_rx_frames_untag(vnetp->pvid, &mp);
1139 		if (mp == NULL) {
1140 			return;
1141 		}
1142 	}
1143 
1144 	mac_rx(vnetp->mh, NULL, mp);
1145 }
1146 
1147 void
1148 vnet_tx_update(vio_net_handle_t vrh)
1149 {
1150 	vnet_res_t *vresp = (vnet_res_t *)vrh;
1151 	vnet_t *vnetp = vresp->vnetp;
1152 
1153 	if ((vnetp != NULL) && (vnetp->mh != NULL)) {
1154 		mac_tx_update(vnetp->mh);
1155 	}
1156 }
1157 
1158 /*
1159  * Update the new mtu of vnet into the mac layer. First check if the device has
1160  * been plumbed and if so fail the mtu update. Returns 0 on success.
1161  */
1162 int
1163 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu)
1164 {
1165 	int	rv;
1166 
1167 	if (vnetp == NULL || vnetp->mh == NULL) {
1168 		return (EINVAL);
1169 	}
1170 
1171 	WRITE_ENTER(&vnetp->vrwlock);
1172 
1173 	if (vnetp->flags & VNET_STARTED) {
1174 		RW_EXIT(&vnetp->vrwlock);
1175 		cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu "
1176 		    "update as the device is plumbed\n",
1177 		    vnetp->instance);
1178 		return (EBUSY);
1179 	}
1180 
1181 	/* update mtu in the mac layer */
1182 	rv = mac_maxsdu_update(vnetp->mh, mtu);
1183 	if (rv != 0) {
1184 		RW_EXIT(&vnetp->vrwlock);
1185 		cmn_err(CE_NOTE,
1186 		    "!vnet%d: Unable to update mtu with mac layer\n",
1187 		    vnetp->instance);
1188 		return (EIO);
1189 	}
1190 
1191 	vnetp->mtu = mtu;
1192 
1193 	RW_EXIT(&vnetp->vrwlock);
1194 
1195 	return (0);
1196 }
1197 
1198 /*
1199  * Update the link state of vnet to the mac layer.
1200  */
1201 void
1202 vnet_link_update(vnet_t *vnetp, link_state_t link_state)
1203 {
1204 	if (vnetp == NULL || vnetp->mh == NULL) {
1205 		return;
1206 	}
1207 
1208 	WRITE_ENTER(&vnetp->vrwlock);
1209 	if (vnetp->link_state == link_state) {
1210 		RW_EXIT(&vnetp->vrwlock);
1211 		return;
1212 	}
1213 	vnetp->link_state = link_state;
1214 	RW_EXIT(&vnetp->vrwlock);
1215 
1216 	mac_link_update(vnetp->mh, link_state);
1217 }
1218 
1219 /*
1220  * vio_net_resource_reg -- An interface called to register a resource
1221  *	with vnet.
1222  *	macp -- a GLDv3 mac_register that has all the details of
1223  *		a resource and its callbacks etc.
1224  *	type -- resource type.
1225  *	local_macaddr -- resource's MAC address. This is used to
1226  *			 associate a resource with a corresponding vnet.
1227  *	remote_macaddr -- remote side MAC address. This is ignored for
1228  *			  the Hybrid resources.
1229  *	vhp -- A handle returned to the caller.
1230  *	vcb -- A set of callbacks provided to the callers.
1231  */
1232 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
1233     ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
1234     vio_net_callbacks_t *vcb)
1235 {
1236 	vnet_t	*vnetp;
1237 	vnet_res_t *vresp;
1238 
1239 	vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
1240 	ether_copy(local_macaddr, vresp->local_macaddr);
1241 	ether_copy(rem_macaddr, vresp->rem_macaddr);
1242 	vresp->type = type;
1243 	bcopy(macp, &vresp->macreg, sizeof (mac_register_t));
1244 
1245 	DBG1(NULL, "Resource Registerig type=0%X\n", type);
1246 
1247 	READ_ENTER(&vnet_rw);
1248 	vnetp = vnet_headp;
1249 	while (vnetp != NULL) {
1250 		if (VNET_MATCH_RES(vresp, vnetp)) {
1251 			vresp->vnetp = vnetp;
1252 
1253 			/* Setup kstats for hio resource */
1254 			if (vresp->type == VIO_NET_RES_HYBRID) {
1255 				vresp->ksp = vnet_hio_setup_kstats(DRV_NAME,
1256 				    "hio", vresp);
1257 				if (vresp->ksp == NULL) {
1258 					cmn_err(CE_NOTE, "!vnet%d: Cannot "
1259 					    "create kstats for hio resource",
1260 					    vnetp->instance);
1261 				}
1262 			}
1263 
1264 			WRITE_ENTER(&vnetp->vrwlock);
1265 			vresp->nextp = vnetp->vres_list;
1266 			vnetp->vres_list = vresp;
1267 			RW_EXIT(&vnetp->vrwlock);
1268 			break;
1269 		}
1270 		vnetp = vnetp->nextp;
1271 	}
1272 	RW_EXIT(&vnet_rw);
1273 	if (vresp->vnetp == NULL) {
1274 		DWARN(NULL, "No vnet instance");
1275 		kmem_free(vresp, sizeof (vnet_res_t));
1276 		return (ENXIO);
1277 	}
1278 
1279 	*vhp = vresp;
1280 	vcb->vio_net_rx_cb = vnet_rx;
1281 	vcb->vio_net_tx_update = vnet_tx_update;
1282 	vcb->vio_net_report_err = vnet_handle_res_err;
1283 
1284 	/* Dispatch a task to start resources */
1285 	vnet_dispatch_res_task(vnetp);
1286 	return (0);
1287 }
1288 
1289 /*
1290  * vio_net_resource_unreg -- An interface to unregister a resource.
1291  */
1292 void
1293 vio_net_resource_unreg(vio_net_handle_t vhp)
1294 {
1295 	vnet_res_t	*vresp = (vnet_res_t *)vhp;
1296 	vnet_t		*vnetp = vresp->vnetp;
1297 	vnet_res_t	*vrp;
1298 	kstat_t		*ksp = NULL;
1299 
1300 	DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
1301 
1302 	ASSERT(vnetp != NULL);
1303 	/*
1304 	 * Remove the resource from fdb; this ensures
1305 	 * there are no references to the resource.
1306 	 */
1307 	vnet_fdbe_del(vnetp, vresp);
1308 
1309 	/* Now remove the resource from the list */
1310 	WRITE_ENTER(&vnetp->vrwlock);
1311 	if (vresp == vnetp->vres_list) {
1312 		vnetp->vres_list = vresp->nextp;
1313 	} else {
1314 		vrp = vnetp->vres_list;
1315 		while (vrp->nextp != NULL) {
1316 			if (vrp->nextp == vresp) {
1317 				vrp->nextp = vresp->nextp;
1318 				break;
1319 			}
1320 			vrp = vrp->nextp;
1321 		}
1322 	}
1323 
1324 	ksp = vresp->ksp;
1325 	vresp->ksp = NULL;
1326 
1327 	vresp->vnetp = NULL;
1328 	vresp->nextp = NULL;
1329 	RW_EXIT(&vnetp->vrwlock);
1330 	vnet_hio_destroy_kstats(ksp);
1331 	KMEM_FREE(vresp);
1332 }
1333 
1334 /*
1335  * vnet_dds_rx -- an interface called by vgen to DDS messages.
1336  */
1337 void
1338 vnet_dds_rx(void *arg, void *dmsg)
1339 {
1340 	vnet_t *vnetp = arg;
1341 	vdds_process_dds_msg(vnetp, dmsg);
1342 }
1343 
1344 /*
1345  * vnet_send_dds_msg -- An interface provided to DDS to send
1346  *	DDS messages. This simply sends meessages via vgen.
1347  */
1348 int
1349 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg)
1350 {
1351 	int rv;
1352 
1353 	if (vnetp->vgenhdl != NULL) {
1354 		rv = vgen_dds_tx(vnetp->vgenhdl, dmsg);
1355 	}
1356 	return (rv);
1357 }
1358 
1359 /*
1360  * vnet_cleanup_hio -- an interface called by vgen to cleanup hio resources.
1361  */
1362 void
1363 vnet_dds_cleanup_hio(vnet_t *vnetp)
1364 {
1365 	vdds_cleanup_hio(vnetp);
1366 }
1367 
1368 /*
1369  * vnet_handle_res_err -- A callback function called by a resource
1370  *	to report an error. For example, vgen can call to report
1371  *	an LDC down/reset event. This will trigger cleanup of associated
1372  *	Hybrid resource.
1373  */
1374 /* ARGSUSED */
1375 static void
1376 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err)
1377 {
1378 	vnet_res_t *vresp = (vnet_res_t *)vrh;
1379 	vnet_t *vnetp = vresp->vnetp;
1380 
1381 	if (vnetp == NULL) {
1382 		return;
1383 	}
1384 	if ((vresp->type != VIO_NET_RES_LDC_SERVICE) &&
1385 	    (vresp->type != VIO_NET_RES_HYBRID)) {
1386 		return;
1387 	}
1388 
1389 	vdds_cleanup_hio(vnetp);
1390 }
1391 
1392 /*
1393  * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
1394  */
1395 static void
1396 vnet_dispatch_res_task(vnet_t *vnetp)
1397 {
1398 	int rv;
1399 
1400 	/*
1401 	 * Dispatch the task. It could be the case that vnetp->flags does
1402 	 * not have VNET_STARTED set. This is ok as vnet_rest_start_task()
1403 	 * can abort the task when the task is started. See related comments
1404 	 * in vnet_m_stop() and vnet_stop_resources().
1405 	 */
1406 	rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task,
1407 	    vnetp, DDI_NOSLEEP);
1408 	if (rv != DDI_SUCCESS) {
1409 		cmn_err(CE_WARN,
1410 		    "vnet%d:Can't dispatch start resource task",
1411 		    vnetp->instance);
1412 	}
1413 }
1414 
1415 /*
1416  * vnet_res_start_task -- A taskq callback function that starts a resource.
1417  */
1418 static void
1419 vnet_res_start_task(void *arg)
1420 {
1421 	vnet_t *vnetp = arg;
1422 
1423 	WRITE_ENTER(&vnetp->vrwlock);
1424 	if (vnetp->flags & VNET_STARTED) {
1425 		vnet_start_resources(vnetp);
1426 	}
1427 	RW_EXIT(&vnetp->vrwlock);
1428 }
1429 
1430 /*
1431  * vnet_start_resources -- starts all resources associated with
1432  *	a vnet.
1433  */
1434 static void
1435 vnet_start_resources(vnet_t *vnetp)
1436 {
1437 	mac_register_t	*macp;
1438 	mac_callbacks_t	*cbp;
1439 	vnet_res_t	*vresp;
1440 	int rv;
1441 
1442 	DBG1(vnetp, "enter\n");
1443 
1444 	ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
1445 
1446 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
1447 		/* skip if it is already started */
1448 		if (vresp->flags & VNET_STARTED) {
1449 			continue;
1450 		}
1451 		macp = &vresp->macreg;
1452 		cbp = macp->m_callbacks;
1453 		rv = cbp->mc_start(macp->m_driver);
1454 		if (rv == 0) {
1455 			/*
1456 			 * Successfully started the resource, so now
1457 			 * add it to the fdb.
1458 			 */
1459 			vresp->flags |= VNET_STARTED;
1460 			vnet_fdbe_add(vnetp, vresp);
1461 		}
1462 	}
1463 
1464 	DBG1(vnetp, "exit\n");
1465 
1466 }
1467 
1468 /*
1469  * vnet_stop_resources -- stop all resources associated with a vnet.
1470  */
1471 static void
1472 vnet_stop_resources(vnet_t *vnetp)
1473 {
1474 	vnet_res_t	*vresp;
1475 	mac_register_t	*macp;
1476 	mac_callbacks_t	*cbp;
1477 
1478 	DBG1(vnetp, "enter\n");
1479 
1480 	ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
1481 
1482 	for (vresp = vnetp->vres_list; vresp != NULL; ) {
1483 		if (vresp->flags & VNET_STARTED) {
1484 			/*
1485 			 * Release the lock while invoking mc_stop() of the
1486 			 * underlying resource. We hold a reference to this
1487 			 * resource to prevent being removed from the list in
1488 			 * vio_net_resource_unreg(). Note that new resources
1489 			 * can be added to the head of the list while the lock
1490 			 * is released, but they won't be started, as
1491 			 * VNET_STARTED flag has been cleared for the vnet
1492 			 * device in vnet_m_stop(). Also, while the lock is
1493 			 * released a resource could be removed from the list
1494 			 * in vio_net_resource_unreg(); but that is ok, as we
1495 			 * re-acquire the lock and only then access the forward
1496 			 * link (vresp->nextp) to continue with the next
1497 			 * resource.
1498 			 */
1499 			vresp->flags &= ~VNET_STARTED;
1500 			vresp->flags |= VNET_STOPPING;
1501 			macp = &vresp->macreg;
1502 			cbp = macp->m_callbacks;
1503 			VNET_FDBE_REFHOLD(vresp);
1504 			RW_EXIT(&vnetp->vrwlock);
1505 
1506 			cbp->mc_stop(macp->m_driver);
1507 
1508 			WRITE_ENTER(&vnetp->vrwlock);
1509 			vresp->flags &= ~VNET_STOPPING;
1510 			VNET_FDBE_REFRELE(vresp);
1511 		}
1512 		vresp = vresp->nextp;
1513 	}
1514 	DBG1(vnetp, "exit\n");
1515 }
1516 
1517 /*
1518  * Setup kstats for the HIO statistics.
1519  * NOTE: the synchronization for the statistics is the
1520  * responsibility of the caller.
1521  */
1522 kstat_t *
1523 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp)
1524 {
1525 	kstat_t *ksp;
1526 	vnet_t *vnetp = vresp->vnetp;
1527 	vnet_hio_kstats_t *hiokp;
1528 	size_t size;
1529 
1530 	ASSERT(vnetp != NULL);
1531 	size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t);
1532 	ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net",
1533 	    KSTAT_TYPE_NAMED, size, 0);
1534 	if (ksp == NULL) {
1535 		return (NULL);
1536 	}
1537 
1538 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1539 	kstat_named_init(&hiokp->ipackets,		"ipackets",
1540 	    KSTAT_DATA_ULONG);
1541 	kstat_named_init(&hiokp->ierrors,		"ierrors",
1542 	    KSTAT_DATA_ULONG);
1543 	kstat_named_init(&hiokp->opackets,		"opackets",
1544 	    KSTAT_DATA_ULONG);
1545 	kstat_named_init(&hiokp->oerrors,		"oerrors",
1546 	    KSTAT_DATA_ULONG);
1547 
1548 
1549 	/* MIB II kstat variables */
1550 	kstat_named_init(&hiokp->rbytes,		"rbytes",
1551 	    KSTAT_DATA_ULONG);
1552 	kstat_named_init(&hiokp->obytes,		"obytes",
1553 	    KSTAT_DATA_ULONG);
1554 	kstat_named_init(&hiokp->multircv,		"multircv",
1555 	    KSTAT_DATA_ULONG);
1556 	kstat_named_init(&hiokp->multixmt,		"multixmt",
1557 	    KSTAT_DATA_ULONG);
1558 	kstat_named_init(&hiokp->brdcstrcv,		"brdcstrcv",
1559 	    KSTAT_DATA_ULONG);
1560 	kstat_named_init(&hiokp->brdcstxmt,		"brdcstxmt",
1561 	    KSTAT_DATA_ULONG);
1562 	kstat_named_init(&hiokp->norcvbuf,		"norcvbuf",
1563 	    KSTAT_DATA_ULONG);
1564 	kstat_named_init(&hiokp->noxmtbuf,		"noxmtbuf",
1565 	    KSTAT_DATA_ULONG);
1566 
1567 	ksp->ks_update = vnet_hio_update_kstats;
1568 	ksp->ks_private = (void *)vresp;
1569 	kstat_install(ksp);
1570 	return (ksp);
1571 }
1572 
1573 /*
1574  * Destroy kstats.
1575  */
1576 static void
1577 vnet_hio_destroy_kstats(kstat_t *ksp)
1578 {
1579 	if (ksp != NULL)
1580 		kstat_delete(ksp);
1581 }
1582 
1583 /*
1584  * Update the kstats.
1585  */
1586 static int
1587 vnet_hio_update_kstats(kstat_t *ksp, int rw)
1588 {
1589 	vnet_t *vnetp;
1590 	vnet_res_t *vresp;
1591 	vnet_hio_stats_t statsp;
1592 	vnet_hio_kstats_t *hiokp;
1593 
1594 	vresp = (vnet_res_t *)ksp->ks_private;
1595 	vnetp = vresp->vnetp;
1596 
1597 	bzero(&statsp, sizeof (vnet_hio_stats_t));
1598 
1599 	READ_ENTER(&vnetp->vsw_fp_rw);
1600 	if (vnetp->hio_fp == NULL) {
1601 		/* not using hio resources, just return */
1602 		RW_EXIT(&vnetp->vsw_fp_rw);
1603 		return (0);
1604 	}
1605 	VNET_FDBE_REFHOLD(vnetp->hio_fp);
1606 	RW_EXIT(&vnetp->vsw_fp_rw);
1607 	vnet_hio_get_stats(vnetp->hio_fp, &statsp);
1608 	VNET_FDBE_REFRELE(vnetp->hio_fp);
1609 
1610 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1611 
1612 	if (rw == KSTAT_READ) {
1613 		/* Link Input/Output stats */
1614 		hiokp->ipackets.value.ul	= (uint32_t)statsp.ipackets;
1615 		hiokp->ipackets64.value.ull	= statsp.ipackets;
1616 		hiokp->ierrors.value.ul		= statsp.ierrors;
1617 		hiokp->opackets.value.ul	= (uint32_t)statsp.opackets;
1618 		hiokp->opackets64.value.ull	= statsp.opackets;
1619 		hiokp->oerrors.value.ul		= statsp.oerrors;
1620 
1621 		/* MIB II kstat variables */
1622 		hiokp->rbytes.value.ul		= (uint32_t)statsp.rbytes;
1623 		hiokp->rbytes64.value.ull	= statsp.rbytes;
1624 		hiokp->obytes.value.ul		= (uint32_t)statsp.obytes;
1625 		hiokp->obytes64.value.ull	= statsp.obytes;
1626 		hiokp->multircv.value.ul	= statsp.multircv;
1627 		hiokp->multixmt.value.ul	= statsp.multixmt;
1628 		hiokp->brdcstrcv.value.ul	= statsp.brdcstrcv;
1629 		hiokp->brdcstxmt.value.ul	= statsp.brdcstxmt;
1630 		hiokp->norcvbuf.value.ul	= statsp.norcvbuf;
1631 		hiokp->noxmtbuf.value.ul	= statsp.noxmtbuf;
1632 	} else {
1633 		return (EACCES);
1634 	}
1635 
1636 	return (0);
1637 }
1638 
1639 static void
1640 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp)
1641 {
1642 	mac_register_t		*macp;
1643 	mac_callbacks_t		*cbp;
1644 	uint64_t		val;
1645 	int			stat;
1646 
1647 	/*
1648 	 * get the specified statistics from the underlying nxge.
1649 	 */
1650 	macp = &vresp->macreg;
1651 	cbp = macp->m_callbacks;
1652 	for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) {
1653 		if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) {
1654 			switch (stat) {
1655 			case MAC_STAT_IPACKETS:
1656 				statsp->ipackets = val;
1657 				break;
1658 
1659 			case MAC_STAT_IERRORS:
1660 				statsp->ierrors = val;
1661 				break;
1662 
1663 			case MAC_STAT_OPACKETS:
1664 				statsp->opackets = val;
1665 				break;
1666 
1667 			case MAC_STAT_OERRORS:
1668 				statsp->oerrors = val;
1669 				break;
1670 
1671 			case MAC_STAT_RBYTES:
1672 				statsp->rbytes = val;
1673 				break;
1674 
1675 			case MAC_STAT_OBYTES:
1676 				statsp->obytes = val;
1677 				break;
1678 
1679 			case MAC_STAT_MULTIRCV:
1680 				statsp->multircv = val;
1681 				break;
1682 
1683 			case MAC_STAT_MULTIXMT:
1684 				statsp->multixmt = val;
1685 				break;
1686 
1687 			case MAC_STAT_BRDCSTRCV:
1688 				statsp->brdcstrcv = val;
1689 				break;
1690 
1691 			case MAC_STAT_BRDCSTXMT:
1692 				statsp->brdcstxmt = val;
1693 				break;
1694 
1695 			case MAC_STAT_NOXMTBUF:
1696 				statsp->noxmtbuf = val;
1697 				break;
1698 
1699 			case MAC_STAT_NORCVBUF:
1700 				statsp->norcvbuf = val;
1701 				break;
1702 
1703 			default:
1704 				/*
1705 				 * parameters not interested.
1706 				 */
1707 				break;
1708 			}
1709 		}
1710 	}
1711 }
1712 
1713 #ifdef	VNET_IOC_DEBUG
1714 
1715 /*
1716  * The ioctl entry point is used only for debugging for now. The ioctl commands
1717  * can be used to force the link state of the channel connected to vsw.
1718  */
1719 static void
1720 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1721 {
1722 	struct iocblk	*iocp;
1723 	vnet_t		*vnetp;
1724 
1725 	iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
1726 	iocp->ioc_error = 0;
1727 	vnetp = (vnet_t *)arg;
1728 
1729 	if (vnetp == NULL) {
1730 		miocnak(q, mp, 0, EINVAL);
1731 		return;
1732 	}
1733 
1734 	switch (iocp->ioc_cmd) {
1735 
1736 	case VNET_FORCE_LINK_DOWN:
1737 	case VNET_FORCE_LINK_UP:
1738 		vnet_force_link_state(vnetp, q, mp);
1739 		break;
1740 
1741 	default:
1742 		iocp->ioc_error = EINVAL;
1743 		miocnak(q, mp, 0, iocp->ioc_error);
1744 		break;
1745 
1746 	}
1747 }
1748 
1749 static void
1750 vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp)
1751 {
1752 	mac_register_t	*macp;
1753 	mac_callbacks_t	*cbp;
1754 	vnet_res_t	*vresp;
1755 
1756 	READ_ENTER(&vnetp->vsw_fp_rw);
1757 
1758 	vresp = vnetp->vsw_fp;
1759 	if (vresp == NULL) {
1760 		RW_EXIT(&vnetp->vsw_fp_rw);
1761 		return;
1762 	}
1763 
1764 	macp = &vresp->macreg;
1765 	cbp = macp->m_callbacks;
1766 	cbp->mc_ioctl(macp->m_driver, q, mp);
1767 
1768 	RW_EXIT(&vnetp->vsw_fp_rw);
1769 }
1770 
1771 #else
1772 
1773 static void
1774 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1775 {
1776 	vnet_t		*vnetp;
1777 
1778 	vnetp = (vnet_t *)arg;
1779 
1780 	if (vnetp == NULL) {
1781 		miocnak(q, mp, 0, EINVAL);
1782 		return;
1783 	}
1784 
1785 	/* ioctl support only for debugging */
1786 	miocnak(q, mp, 0, ENOTSUP);
1787 }
1788 
1789 #endif
1790