xref: /titanic_50/usr/src/uts/sun4v/io/vnet.c (revision b885580b43755ee4ea1e280b85428893d2ba9291)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/param.h>
30 #include <sys/stream.h>
31 #include <sys/kmem.h>
32 #include <sys/conf.h>
33 #include <sys/devops.h>
34 #include <sys/ksynch.h>
35 #include <sys/stat.h>
36 #include <sys/modctl.h>
37 #include <sys/modhash.h>
38 #include <sys/debug.h>
39 #include <sys/ethernet.h>
40 #include <sys/dlpi.h>
41 #include <net/if.h>
42 #include <sys/mac_provider.h>
43 #include <sys/mac_client.h>
44 #include <sys/mac_client_priv.h>
45 #include <sys/mac_ether.h>
46 #include <sys/ddi.h>
47 #include <sys/sunddi.h>
48 #include <sys/strsun.h>
49 #include <sys/note.h>
50 #include <sys/atomic.h>
51 #include <sys/vnet.h>
52 #include <sys/vlan.h>
53 #include <sys/vnet_mailbox.h>
54 #include <sys/vnet_common.h>
55 #include <sys/dds.h>
56 #include <sys/strsubr.h>
57 #include <sys/taskq.h>
58 
59 /*
60  * Function prototypes.
61  */
62 
63 /* DDI entrypoints */
64 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
65 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
66 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
67 
68 /* MAC entrypoints  */
69 static int vnet_m_stat(void *, uint_t, uint64_t *);
70 static int vnet_m_start(void *);
71 static void vnet_m_stop(void *);
72 static int vnet_m_promisc(void *, boolean_t);
73 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
74 static int vnet_m_unicst(void *, const uint8_t *);
75 mblk_t *vnet_m_tx(void *, mblk_t *);
76 static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp);
77 #ifdef	VNET_IOC_DEBUG
78 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp);
79 #endif
80 static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data);
81 static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
82 	const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle);
83 static void vnet_get_group(void *arg, mac_ring_type_t type, const int index,
84 	mac_group_info_t *infop, mac_group_handle_t handle);
85 static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
86 static void vnet_rx_ring_stop(mac_ring_driver_t rdriver);
87 static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
88 static void vnet_tx_ring_stop(mac_ring_driver_t rdriver);
89 static int vnet_ring_enable_intr(void *arg);
90 static int vnet_ring_disable_intr(void *arg);
91 static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup);
92 static int vnet_addmac(void *arg, const uint8_t *mac_addr);
93 static int vnet_remmac(void *arg, const uint8_t *mac_addr);
94 
95 /* vnet internal functions */
96 static int vnet_unattach(vnet_t *vnetp);
97 static void vnet_ring_grp_init(vnet_t *vnetp);
98 static void vnet_ring_grp_uninit(vnet_t *vnetp);
99 static int vnet_mac_register(vnet_t *);
100 static int vnet_read_mac_address(vnet_t *vnetp);
101 static int vnet_bind_vgenring(vnet_res_t *vresp);
102 static void vnet_unbind_vgenring(vnet_res_t *vresp);
103 static int vnet_bind_hwrings(vnet_t *vnetp);
104 static void vnet_unbind_hwrings(vnet_t *vnetp);
105 static int vnet_bind_rings(vnet_res_t *vresp);
106 static void vnet_unbind_rings(vnet_res_t *vresp);
107 static int vnet_hio_stat(void *, uint_t, uint64_t *);
108 static int vnet_hio_start(void *);
109 static void vnet_hio_stop(void *);
110 static void vnet_hio_notify_cb(void *arg, mac_notify_type_t type);
111 mblk_t *vnet_hio_tx(void *, mblk_t *);
112 
113 /* Forwarding database (FDB) routines */
114 static void vnet_fdb_create(vnet_t *vnetp);
115 static void vnet_fdb_destroy(vnet_t *vnetp);
116 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp);
117 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
118 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp);
119 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp);
120 
121 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp);
122 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp);
123 static void vnet_tx_update(vio_net_handle_t vrh);
124 static void vnet_res_start_task(void *arg);
125 static void vnet_start_resources(vnet_t *vnetp);
126 static void vnet_stop_resources(vnet_t *vnetp);
127 static void vnet_dispatch_res_task(vnet_t *vnetp);
128 static void vnet_res_start_task(void *arg);
129 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
130 static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp);
131 static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp);
132 
133 /* Exported to vnet_gen */
134 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
135 void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
136 void vnet_dds_cleanup_hio(vnet_t *vnetp);
137 
138 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name,
139     vnet_res_t *vresp);
140 static int vnet_hio_update_kstats(kstat_t *ksp, int rw);
141 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp);
142 static void vnet_hio_destroy_kstats(kstat_t *ksp);
143 
144 /* Exported to to vnet_dds */
145 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
146 int vnet_hio_mac_init(vnet_t *vnetp, char *ifname);
147 void vnet_hio_mac_cleanup(vnet_t *vnetp);
148 
149 /* Externs that are imported from vnet_gen */
150 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
151     const uint8_t *macaddr, void **vgenhdl);
152 extern int vgen_init_mdeg(void *arg);
153 extern void vgen_uninit(void *arg);
154 extern int vgen_dds_tx(void *arg, void *dmsg);
155 extern void vgen_mod_init(void);
156 extern int vgen_mod_cleanup(void);
157 extern void vgen_mod_fini(void);
158 extern int vgen_enable_intr(void *arg);
159 extern int vgen_disable_intr(void *arg);
160 extern mblk_t *vgen_poll(void *arg, int bytes_to_pickup);
161 
162 /* Externs that are imported from vnet_dds */
163 extern void vdds_mod_init(void);
164 extern void vdds_mod_fini(void);
165 extern int vdds_init(vnet_t *vnetp);
166 extern void vdds_cleanup(vnet_t *vnetp);
167 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
168 extern void vdds_cleanup_hybrid_res(void *arg);
169 extern void vdds_cleanup_hio(vnet_t *vnetp);
170 
171 /* Externs imported from mac_impl */
172 extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *);
173 
174 #define	DRV_NAME	"vnet"
175 #define	VNET_FDBE_REFHOLD(p)						\
176 {									\
177 	atomic_inc_32(&(p)->refcnt);					\
178 	ASSERT((p)->refcnt != 0);					\
179 }
180 
181 #define	VNET_FDBE_REFRELE(p)						\
182 {									\
183 	ASSERT((p)->refcnt != 0);					\
184 	atomic_dec_32(&(p)->refcnt);					\
185 }
186 
187 #ifdef	VNET_IOC_DEBUG
188 #define	VNET_M_CALLBACK_FLAGS	(MC_IOCTL | MC_GETCAPAB)
189 #else
190 #define	VNET_M_CALLBACK_FLAGS	(MC_GETCAPAB)
191 #endif
192 
193 static mac_callbacks_t vnet_m_callbacks = {
194 	VNET_M_CALLBACK_FLAGS,
195 	vnet_m_stat,
196 	vnet_m_start,
197 	vnet_m_stop,
198 	vnet_m_promisc,
199 	vnet_m_multicst,
200 	NULL,	/* m_unicst entry must be NULL while rx rings are exposed */
201 	NULL,	/* m_tx entry must be NULL while tx rings are exposed */
202 	vnet_m_ioctl,
203 	vnet_m_capab,
204 	NULL
205 };
206 
207 static mac_callbacks_t vnet_hio_res_callbacks = {
208 	0,
209 	vnet_hio_stat,
210 	vnet_hio_start,
211 	vnet_hio_stop,
212 	NULL,
213 	NULL,
214 	NULL,
215 	vnet_hio_tx,
216 	NULL,
217 	NULL,
218 	NULL
219 };
220 
221 /*
222  * Linked list of "vnet_t" structures - one per instance.
223  */
224 static vnet_t	*vnet_headp = NULL;
225 static krwlock_t vnet_rw;
226 
227 /* Tunables */
228 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
229 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
230 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
231 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
232 
233 /* Configure tx serialization in mac layer for the vnet device */
234 boolean_t vnet_mac_tx_serialize = B_TRUE;
235 
236 /*
237  * Set this to non-zero to enable additional internal receive buffer pools
238  * based on the MTU of the device for better performance at the cost of more
239  * memory consumption. This is turned off by default, to use allocb(9F) for
240  * receive buffer allocations of sizes > 2K.
241  */
242 boolean_t vnet_jumbo_rxpools = B_FALSE;
243 
244 /* # of chains in fdb hash table */
245 uint32_t	vnet_fdb_nchains = VNET_NFDB_HASH;
246 
247 /* Internal tunables */
248 uint32_t	vnet_ethermtu = 1500;	/* mtu of the device */
249 
250 /*
251  * Default vlan id. This is only used internally when the "default-vlan-id"
252  * property is not present in the MD device node. Therefore, this should not be
253  * used as a tunable; if this value is changed, the corresponding variable
254  * should be updated to the same value in vsw and also other vnets connected to
255  * the same vsw.
256  */
257 uint16_t	vnet_default_vlan_id = 1;
258 
259 /* delay in usec to wait for all references on a fdb entry to be dropped */
260 uint32_t vnet_fdbe_refcnt_delay = 10;
261 
262 static struct ether_addr etherbroadcastaddr = {
263 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
264 };
265 
266 /* mac_open() retry delay in usec */
267 uint32_t vnet_mac_open_delay = 100;	/* 0.1 ms */
268 
269 /* max # of mac_open() retries */
270 uint32_t vnet_mac_open_retries = 100;
271 
272 /*
273  * Property names
274  */
275 static char macaddr_propname[] = "local-mac-address";
276 
277 /*
278  * This is the string displayed by modinfo(1m).
279  */
280 static char vnet_ident[] = "vnet driver";
281 extern struct mod_ops mod_driverops;
282 static struct cb_ops cb_vnetops = {
283 	nulldev,		/* cb_open */
284 	nulldev,		/* cb_close */
285 	nodev,			/* cb_strategy */
286 	nodev,			/* cb_print */
287 	nodev,			/* cb_dump */
288 	nodev,			/* cb_read */
289 	nodev,			/* cb_write */
290 	nodev,			/* cb_ioctl */
291 	nodev,			/* cb_devmap */
292 	nodev,			/* cb_mmap */
293 	nodev,			/* cb_segmap */
294 	nochpoll,		/* cb_chpoll */
295 	ddi_prop_op,		/* cb_prop_op */
296 	NULL,			/* cb_stream */
297 	(int)(D_MP)		/* cb_flag */
298 };
299 
300 static struct dev_ops vnetops = {
301 	DEVO_REV,		/* devo_rev */
302 	0,			/* devo_refcnt */
303 	NULL,			/* devo_getinfo */
304 	nulldev,		/* devo_identify */
305 	nulldev,		/* devo_probe */
306 	vnetattach,		/* devo_attach */
307 	vnetdetach,		/* devo_detach */
308 	nodev,			/* devo_reset */
309 	&cb_vnetops,		/* devo_cb_ops */
310 	(struct bus_ops *)NULL,	/* devo_bus_ops */
311 	NULL,			/* devo_power */
312 	ddi_quiesce_not_supported,	/* devo_quiesce */
313 };
314 
315 static struct modldrv modldrv = {
316 	&mod_driverops,		/* Type of module.  This one is a driver */
317 	vnet_ident,		/* ID string */
318 	&vnetops		/* driver specific ops */
319 };
320 
321 static struct modlinkage modlinkage = {
322 	MODREV_1, (void *)&modldrv, NULL
323 };
324 
325 #ifdef DEBUG
326 
327 /*
328  * Print debug messages - set to 0xf to enable all msgs
329  */
330 int vnet_dbglevel = 0x8;
331 
332 static void
333 debug_printf(const char *fname, void *arg, const char *fmt, ...)
334 {
335 	char    buf[512];
336 	va_list ap;
337 	vnet_t *vnetp = (vnet_t *)arg;
338 	char    *bufp = buf;
339 
340 	if (vnetp == NULL) {
341 		(void) sprintf(bufp, "%s: ", fname);
342 		bufp += strlen(bufp);
343 	} else {
344 		(void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
345 		bufp += strlen(bufp);
346 	}
347 	va_start(ap, fmt);
348 	(void) vsprintf(bufp, fmt, ap);
349 	va_end(ap);
350 	cmn_err(CE_CONT, "%s\n", buf);
351 }
352 
353 #endif
354 
355 /* _init(9E): initialize the loadable module */
356 int
357 _init(void)
358 {
359 	int status;
360 
361 	DBG1(NULL, "enter\n");
362 
363 	mac_init_ops(&vnetops, "vnet");
364 	status = mod_install(&modlinkage);
365 	if (status != 0) {
366 		mac_fini_ops(&vnetops);
367 	}
368 	vdds_mod_init();
369 	vgen_mod_init();
370 	DBG1(NULL, "exit(%d)\n", status);
371 	return (status);
372 }
373 
374 /* _fini(9E): prepare the module for unloading. */
375 int
376 _fini(void)
377 {
378 	int		status;
379 
380 	DBG1(NULL, "enter\n");
381 
382 	status = vgen_mod_cleanup();
383 	if (status != 0)
384 		return (status);
385 
386 	status = mod_remove(&modlinkage);
387 	if (status != 0)
388 		return (status);
389 	mac_fini_ops(&vnetops);
390 	vgen_mod_fini();
391 	vdds_mod_fini();
392 
393 	DBG1(NULL, "exit(%d)\n", status);
394 	return (status);
395 }
396 
397 /* _info(9E): return information about the loadable module */
398 int
399 _info(struct modinfo *modinfop)
400 {
401 	return (mod_info(&modlinkage, modinfop));
402 }
403 
404 /*
405  * attach(9E): attach a device to the system.
406  * called once for each instance of the device on the system.
407  */
408 static int
409 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
410 {
411 	vnet_t			*vnetp;
412 	int			status;
413 	int			instance;
414 	uint64_t		reg;
415 	char			qname[TASKQ_NAMELEN];
416 	vnet_attach_progress_t	attach_progress;
417 
418 	attach_progress = AST_init;
419 
420 	switch (cmd) {
421 	case DDI_ATTACH:
422 		break;
423 	case DDI_RESUME:
424 	case DDI_PM_RESUME:
425 	default:
426 		goto vnet_attach_fail;
427 	}
428 
429 	instance = ddi_get_instance(dip);
430 	DBG1(NULL, "instance(%d) enter\n", instance);
431 
432 	/* allocate vnet_t and mac_t structures */
433 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
434 	vnetp->dip = dip;
435 	vnetp->instance = instance;
436 	rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL);
437 	rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
438 	attach_progress |= AST_vnet_alloc;
439 
440 	vnet_ring_grp_init(vnetp);
441 	attach_progress |= AST_ring_init;
442 
443 	status = vdds_init(vnetp);
444 	if (status != 0) {
445 		goto vnet_attach_fail;
446 	}
447 	attach_progress |= AST_vdds_init;
448 
449 	/* setup links to vnet_t from both devinfo and mac_t */
450 	ddi_set_driver_private(dip, (caddr_t)vnetp);
451 
452 	/* read the mac address */
453 	status = vnet_read_mac_address(vnetp);
454 	if (status != DDI_SUCCESS) {
455 		goto vnet_attach_fail;
456 	}
457 	attach_progress |= AST_read_macaddr;
458 
459 	reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
460 	    DDI_PROP_DONTPASS, "reg", -1);
461 	if (reg == -1) {
462 		goto vnet_attach_fail;
463 	}
464 	vnetp->reg = reg;
465 
466 	vnet_fdb_create(vnetp);
467 	attach_progress |= AST_fdbh_alloc;
468 
469 	(void) snprintf(qname, TASKQ_NAMELEN, "vnet_taskq%d", instance);
470 	if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1,
471 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
472 		cmn_err(CE_WARN, "!vnet%d: Unable to create task queue",
473 		    instance);
474 		goto vnet_attach_fail;
475 	}
476 	attach_progress |= AST_taskq_create;
477 
478 	/* add to the list of vnet devices */
479 	WRITE_ENTER(&vnet_rw);
480 	vnetp->nextp = vnet_headp;
481 	vnet_headp = vnetp;
482 	RW_EXIT(&vnet_rw);
483 
484 	attach_progress |= AST_vnet_list;
485 
486 	/*
487 	 * Initialize the generic vnet plugin which provides communication via
488 	 * sun4v LDC (logical domain channel) based resources. This involves 2
489 	 * steps; first, vgen_init() is invoked to read the various properties
490 	 * of the vnet device from its MD node (including its mtu which is
491 	 * needed to mac_register()) and obtain a handle to the vgen layer.
492 	 * After mac_register() is done and we have a mac handle, we then
493 	 * invoke vgen_init_mdeg() which registers with the the MD event
494 	 * generator (mdeg) framework to allow LDC resource notifications.
495 	 * Note: this sequence also allows us to report the correct default #
496 	 * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked
497 	 * in the context of mac_register(); and avoids conflicting with
498 	 * dynamic pseudo rx rings which get added/removed as a result of mdeg
499 	 * events in vgen.
500 	 */
501 	status = vgen_init(vnetp, reg, vnetp->dip,
502 	    (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
503 	if (status != DDI_SUCCESS) {
504 		DERR(vnetp, "vgen_init() failed\n");
505 		goto vnet_attach_fail;
506 	}
507 	attach_progress |= AST_vgen_init;
508 
509 	status = vnet_mac_register(vnetp);
510 	if (status != DDI_SUCCESS) {
511 		goto vnet_attach_fail;
512 	}
513 	vnetp->link_state = LINK_STATE_UNKNOWN;
514 	attach_progress |= AST_macreg;
515 
516 	status = vgen_init_mdeg(vnetp->vgenhdl);
517 	if (status != DDI_SUCCESS) {
518 		goto vnet_attach_fail;
519 	}
520 	attach_progress |= AST_init_mdeg;
521 
522 	vnetp->attach_progress = attach_progress;
523 
524 	DBG1(NULL, "instance(%d) exit\n", instance);
525 	return (DDI_SUCCESS);
526 
527 vnet_attach_fail:
528 	vnetp->attach_progress = attach_progress;
529 	status = vnet_unattach(vnetp);
530 	ASSERT(status == 0);
531 	return (DDI_FAILURE);
532 }
533 
534 /*
535  * detach(9E): detach a device from the system.
536  */
537 static int
538 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
539 {
540 	vnet_t		*vnetp;
541 	int		instance;
542 
543 	instance = ddi_get_instance(dip);
544 	DBG1(NULL, "instance(%d) enter\n", instance);
545 
546 	vnetp = ddi_get_driver_private(dip);
547 	if (vnetp == NULL) {
548 		goto vnet_detach_fail;
549 	}
550 
551 	switch (cmd) {
552 	case DDI_DETACH:
553 		break;
554 	case DDI_SUSPEND:
555 	case DDI_PM_SUSPEND:
556 	default:
557 		goto vnet_detach_fail;
558 	}
559 
560 	if (vnet_unattach(vnetp) != 0) {
561 		goto vnet_detach_fail;
562 	}
563 
564 	return (DDI_SUCCESS);
565 
566 vnet_detach_fail:
567 	return (DDI_FAILURE);
568 }
569 
570 /*
571  * Common routine to handle vnetattach() failure and vnetdetach(). Note that
572  * the only reason this function could fail is if mac_unregister() fails.
573  * Otherwise, this function must ensure that all resources are freed and return
574  * success.
575  */
576 static int
577 vnet_unattach(vnet_t *vnetp)
578 {
579 	vnet_attach_progress_t	attach_progress;
580 
581 	attach_progress = vnetp->attach_progress;
582 
583 	/*
584 	 * Disable the mac device in the gldv3 subsystem. This can fail, in
585 	 * particular if there are still any open references to this mac
586 	 * device; in which case we just return failure without continuing to
587 	 * detach further.
588 	 * If it succeeds, we then invoke vgen_uninit() which should unregister
589 	 * any pseudo rings registered with the mac layer. Note we keep the
590 	 * AST_macreg flag on, so we can unregister with the mac layer at
591 	 * the end of this routine.
592 	 */
593 	if (attach_progress & AST_macreg) {
594 		if (mac_disable(vnetp->mh) != 0) {
595 			return (1);
596 		}
597 	}
598 
599 	/*
600 	 * Now that we have disabled the device, we must finish all other steps
601 	 * and successfully return from this function; otherwise we will end up
602 	 * leaving the device in a broken/unusable state.
603 	 *
604 	 * First, release any hybrid resources assigned to this vnet device.
605 	 */
606 	if (attach_progress & AST_vdds_init) {
607 		vdds_cleanup(vnetp);
608 		attach_progress &= ~AST_vdds_init;
609 	}
610 
611 	/*
612 	 * Uninit vgen. This stops further mdeg callbacks to this vnet
613 	 * device and/or its ports; and detaches any existing ports.
614 	 */
615 	if (attach_progress & (AST_vgen_init|AST_init_mdeg)) {
616 		vgen_uninit(vnetp->vgenhdl);
617 		attach_progress &= ~AST_vgen_init;
618 		attach_progress &= ~AST_init_mdeg;
619 	}
620 
621 	/* Destroy the taskq. */
622 	if (attach_progress & AST_taskq_create) {
623 		ddi_taskq_destroy(vnetp->taskqp);
624 		attach_progress &= ~AST_taskq_create;
625 	}
626 
627 	/* Destroy fdb. */
628 	if (attach_progress & AST_fdbh_alloc) {
629 		vnet_fdb_destroy(vnetp);
630 		attach_progress &= ~AST_fdbh_alloc;
631 	}
632 
633 	/* Remove from the device list */
634 	if (attach_progress & AST_vnet_list) {
635 		vnet_t		**vnetpp;
636 		/* unlink from instance(vnet_t) list */
637 		WRITE_ENTER(&vnet_rw);
638 		for (vnetpp = &vnet_headp; *vnetpp;
639 		    vnetpp = &(*vnetpp)->nextp) {
640 			if (*vnetpp == vnetp) {
641 				*vnetpp = vnetp->nextp;
642 				break;
643 			}
644 		}
645 		RW_EXIT(&vnet_rw);
646 		attach_progress &= ~AST_vnet_list;
647 	}
648 
649 	if (attach_progress & AST_ring_init) {
650 		vnet_ring_grp_uninit(vnetp);
651 		attach_progress &= ~AST_ring_init;
652 	}
653 
654 	if (attach_progress & AST_macreg) {
655 		VERIFY(mac_unregister(vnetp->mh) == 0);
656 		vnetp->mh = NULL;
657 		attach_progress &= ~AST_macreg;
658 	}
659 
660 	if (attach_progress & AST_vnet_alloc) {
661 		rw_destroy(&vnetp->vrwlock);
662 		rw_destroy(&vnetp->vsw_fp_rw);
663 		attach_progress &= ~AST_vnet_list;
664 		KMEM_FREE(vnetp);
665 	}
666 
667 	return (0);
668 }
669 
670 /* enable the device for transmit/receive */
671 static int
672 vnet_m_start(void *arg)
673 {
674 	vnet_t		*vnetp = arg;
675 
676 	DBG1(vnetp, "enter\n");
677 
678 	WRITE_ENTER(&vnetp->vrwlock);
679 	vnetp->flags |= VNET_STARTED;
680 	vnet_start_resources(vnetp);
681 	RW_EXIT(&vnetp->vrwlock);
682 
683 	DBG1(vnetp, "exit\n");
684 	return (VNET_SUCCESS);
685 
686 }
687 
688 /* stop transmit/receive for the device */
689 static void
690 vnet_m_stop(void *arg)
691 {
692 	vnet_t		*vnetp = arg;
693 
694 	DBG1(vnetp, "enter\n");
695 
696 	WRITE_ENTER(&vnetp->vrwlock);
697 	if (vnetp->flags & VNET_STARTED) {
698 		/*
699 		 * Set the flags appropriately; this should prevent starting of
700 		 * any new resources that are added(see vnet_res_start_task()),
701 		 * while we release the vrwlock in vnet_stop_resources() before
702 		 * stopping each resource.
703 		 */
704 		vnetp->flags &= ~VNET_STARTED;
705 		vnetp->flags |= VNET_STOPPING;
706 		vnet_stop_resources(vnetp);
707 		vnetp->flags &= ~VNET_STOPPING;
708 	}
709 	RW_EXIT(&vnetp->vrwlock);
710 
711 	DBG1(vnetp, "exit\n");
712 }
713 
714 /* set the unicast mac address of the device */
715 static int
716 vnet_m_unicst(void *arg, const uint8_t *macaddr)
717 {
718 	_NOTE(ARGUNUSED(macaddr))
719 
720 	vnet_t *vnetp = arg;
721 
722 	DBG1(vnetp, "enter\n");
723 	/*
724 	 * NOTE: setting mac address dynamically is not supported.
725 	 */
726 	DBG1(vnetp, "exit\n");
727 
728 	return (VNET_FAILURE);
729 }
730 
731 /* enable/disable a multicast address */
732 static int
733 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
734 {
735 	_NOTE(ARGUNUSED(add, mca))
736 
737 	vnet_t *vnetp = arg;
738 	vnet_res_t	*vresp;
739 	mac_register_t	*macp;
740 	mac_callbacks_t	*cbp;
741 	int rv = VNET_SUCCESS;
742 
743 	DBG1(vnetp, "enter\n");
744 
745 	READ_ENTER(&vnetp->vrwlock);
746 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
747 		if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
748 			macp = &vresp->macreg;
749 			cbp = macp->m_callbacks;
750 			rv = cbp->mc_multicst(macp->m_driver, add, mca);
751 		}
752 	}
753 	RW_EXIT(&vnetp->vrwlock);
754 
755 	DBG1(vnetp, "exit(%d)\n", rv);
756 	return (rv);
757 }
758 
759 /* set or clear promiscuous mode on the device */
760 static int
761 vnet_m_promisc(void *arg, boolean_t on)
762 {
763 	_NOTE(ARGUNUSED(on))
764 
765 	vnet_t *vnetp = arg;
766 	DBG1(vnetp, "enter\n");
767 	/*
768 	 * NOTE: setting promiscuous mode is not supported, just return success.
769 	 */
770 	DBG1(vnetp, "exit\n");
771 	return (VNET_SUCCESS);
772 }
773 
774 /*
775  * Transmit a chain of packets. This function provides switching functionality
776  * based on the destination mac address to reach other guests (within ldoms) or
777  * external hosts.
778  */
779 mblk_t *
780 vnet_tx_ring_send(void *arg, mblk_t *mp)
781 {
782 	vnet_pseudo_tx_ring_t	*tx_ringp;
783 	vnet_t			*vnetp;
784 	vnet_res_t		*vresp;
785 	mblk_t			*next;
786 	mblk_t			*resid_mp;
787 	mac_register_t		*macp;
788 	struct ether_header	*ehp;
789 	boolean_t		is_unicast;
790 	boolean_t		is_pvid;	/* non-default pvid ? */
791 	boolean_t		hres;		/* Hybrid resource ? */
792 	void			*tx_arg;
793 
794 	tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
795 	vnetp = (vnet_t *)tx_ringp->vnetp;
796 	DBG1(vnetp, "enter\n");
797 	ASSERT(mp != NULL);
798 
799 	is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE;
800 
801 	while (mp != NULL) {
802 
803 		next = mp->b_next;
804 		mp->b_next = NULL;
805 
806 		/*
807 		 * Find fdb entry for the destination
808 		 * and hold a reference to it.
809 		 */
810 		ehp = (struct ether_header *)mp->b_rptr;
811 		vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
812 		if (vresp != NULL) {
813 
814 			/*
815 			 * Destination found in FDB.
816 			 * The destination is a vnet device within ldoms
817 			 * and directly reachable, invoke the tx function
818 			 * in the fdb entry.
819 			 */
820 			macp = &vresp->macreg;
821 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
822 
823 			/* tx done; now release ref on fdb entry */
824 			VNET_FDBE_REFRELE(vresp);
825 
826 			if (resid_mp != NULL) {
827 				/* m_tx failed */
828 				mp->b_next = next;
829 				break;
830 			}
831 		} else {
832 			is_unicast = !(IS_BROADCAST(ehp) ||
833 			    (IS_MULTICAST(ehp)));
834 			/*
835 			 * Destination is not in FDB.
836 			 * If the destination is broadcast or multicast,
837 			 * then forward the packet to vswitch.
838 			 * If a Hybrid resource avilable, then send the
839 			 * unicast packet via hybrid resource, otherwise
840 			 * forward it to vswitch.
841 			 */
842 			READ_ENTER(&vnetp->vsw_fp_rw);
843 
844 			if ((is_unicast) && (vnetp->hio_fp != NULL)) {
845 				vresp = vnetp->hio_fp;
846 				hres = B_TRUE;
847 			} else {
848 				vresp = vnetp->vsw_fp;
849 				hres = B_FALSE;
850 			}
851 			if (vresp == NULL) {
852 				/*
853 				 * no fdb entry to vsw? drop the packet.
854 				 */
855 				RW_EXIT(&vnetp->vsw_fp_rw);
856 				freemsg(mp);
857 				mp = next;
858 				continue;
859 			}
860 
861 			/* ref hold the fdb entry to vsw */
862 			VNET_FDBE_REFHOLD(vresp);
863 
864 			RW_EXIT(&vnetp->vsw_fp_rw);
865 
866 			/*
867 			 * In the case of a hybrid resource we need to insert
868 			 * the tag for the pvid case here; unlike packets that
869 			 * are destined to a vnet/vsw in which case the vgen
870 			 * layer does the tagging before sending it over ldc.
871 			 */
872 			if (hres == B_TRUE) {
873 				/*
874 				 * Determine if the frame being transmitted
875 				 * over the hybrid resource is untagged. If so,
876 				 * insert the tag before transmitting.
877 				 */
878 				if (is_pvid == B_TRUE &&
879 				    ehp->ether_type != htons(ETHERTYPE_VLAN)) {
880 
881 					mp = vnet_vlan_insert_tag(mp,
882 					    vnetp->pvid);
883 					if (mp == NULL) {
884 						VNET_FDBE_REFRELE(vresp);
885 						mp = next;
886 						continue;
887 					}
888 
889 				}
890 
891 				macp = &vresp->macreg;
892 				tx_arg = tx_ringp;
893 			} else {
894 				macp = &vresp->macreg;
895 				tx_arg = macp->m_driver;
896 			}
897 			resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp);
898 
899 			/* tx done; now release ref on fdb entry */
900 			VNET_FDBE_REFRELE(vresp);
901 
902 			if (resid_mp != NULL) {
903 				/* m_tx failed */
904 				mp->b_next = next;
905 				break;
906 			}
907 		}
908 
909 		mp = next;
910 	}
911 
912 	DBG1(vnetp, "exit\n");
913 	return (mp);
914 }
915 
916 /* get statistics from the device */
917 int
918 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
919 {
920 	vnet_t *vnetp = arg;
921 	vnet_res_t	*vresp;
922 	mac_register_t	*macp;
923 	mac_callbacks_t	*cbp;
924 	uint64_t val_total = 0;
925 
926 	DBG1(vnetp, "enter\n");
927 
928 	/*
929 	 * get the specified statistic from each transport and return the
930 	 * aggregate val.  This obviously only works for counters.
931 	 */
932 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
933 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
934 		return (ENOTSUP);
935 	}
936 
937 	READ_ENTER(&vnetp->vrwlock);
938 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
939 		macp = &vresp->macreg;
940 		cbp = macp->m_callbacks;
941 		if (cbp->mc_getstat(macp->m_driver, stat, val) == 0)
942 			val_total += *val;
943 	}
944 	RW_EXIT(&vnetp->vrwlock);
945 
946 	*val = val_total;
947 
948 	DBG1(vnetp, "exit\n");
949 	return (0);
950 }
951 
952 static void
953 vnet_ring_grp_init(vnet_t *vnetp)
954 {
955 	vnet_pseudo_rx_group_t	*rx_grp;
956 	vnet_pseudo_rx_ring_t	*rx_ringp;
957 	vnet_pseudo_tx_group_t	*tx_grp;
958 	vnet_pseudo_tx_ring_t	*tx_ringp;
959 	int			i;
960 
961 	tx_grp = &vnetp->tx_grp[0];
962 	tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) *
963 	    VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP);
964 	for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) {
965 		tx_ringp[i].state |= VNET_TXRING_SHARED;
966 	}
967 	tx_grp->rings = tx_ringp;
968 	tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS;
969 
970 	rx_grp = &vnetp->rx_grp[0];
971 	rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP;
972 	rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL);
973 	rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) *
974 	    rx_grp->max_ring_cnt, KM_SLEEP);
975 
976 	/*
977 	 * Setup the first 3 Pseudo RX Rings that are reserved;
978 	 * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource.
979 	 */
980 	rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE;
981 	rx_ringp[0].index = 0;
982 	rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
983 	rx_ringp[1].index = 1;
984 	rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
985 	rx_ringp[2].index = 2;
986 
987 	rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
988 	rx_grp->rings = rx_ringp;
989 
990 	for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
991 	    i < rx_grp->max_ring_cnt; i++) {
992 		rx_ringp = &rx_grp->rings[i];
993 		rx_ringp->state = VNET_RXRING_FREE;
994 		rx_ringp->index = i;
995 	}
996 }
997 
998 static void
999 vnet_ring_grp_uninit(vnet_t *vnetp)
1000 {
1001 	vnet_pseudo_rx_group_t	*rx_grp;
1002 	vnet_pseudo_tx_group_t	*tx_grp;
1003 
1004 	tx_grp = &vnetp->tx_grp[0];
1005 	if (tx_grp->rings != NULL) {
1006 		ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS);
1007 		kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) *
1008 		    tx_grp->ring_cnt);
1009 		tx_grp->rings = NULL;
1010 	}
1011 
1012 	rx_grp = &vnetp->rx_grp[0];
1013 	if (rx_grp->rings != NULL) {
1014 		ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP);
1015 		ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
1016 		kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) *
1017 		    rx_grp->max_ring_cnt);
1018 		rx_grp->rings = NULL;
1019 	}
1020 }
1021 
1022 static vnet_pseudo_rx_ring_t *
1023 vnet_alloc_pseudo_rx_ring(vnet_t *vnetp)
1024 {
1025 	vnet_pseudo_rx_group_t  *rx_grp;
1026 	vnet_pseudo_rx_ring_t	*rx_ringp;
1027 	int			index;
1028 
1029 	rx_grp = &vnetp->rx_grp[0];
1030 	WRITE_ENTER(&rx_grp->lock);
1031 
1032 	if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) {
1033 		/* no rings available */
1034 		RW_EXIT(&rx_grp->lock);
1035 		return (NULL);
1036 	}
1037 
1038 	for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1039 	    index < rx_grp->max_ring_cnt; index++) {
1040 		rx_ringp = &rx_grp->rings[index];
1041 		if (rx_ringp->state == VNET_RXRING_FREE) {
1042 			rx_ringp->state |= VNET_RXRING_INUSE;
1043 			rx_grp->ring_cnt++;
1044 			break;
1045 		}
1046 	}
1047 
1048 	RW_EXIT(&rx_grp->lock);
1049 	return (rx_ringp);
1050 }
1051 
1052 static void
1053 vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp)
1054 {
1055 	vnet_pseudo_rx_group_t  *rx_grp;
1056 
1057 	ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
1058 	rx_grp = &vnetp->rx_grp[0];
1059 	WRITE_ENTER(&rx_grp->lock);
1060 
1061 	if (ringp->state != VNET_RXRING_FREE) {
1062 		ringp->state = VNET_RXRING_FREE;
1063 		ringp->handle = NULL;
1064 		rx_grp->ring_cnt--;
1065 	}
1066 
1067 	RW_EXIT(&rx_grp->lock);
1068 }
1069 
1070 /* wrapper function for mac_register() */
1071 static int
1072 vnet_mac_register(vnet_t *vnetp)
1073 {
1074 	mac_register_t	*macp;
1075 	int		err;
1076 
1077 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1078 		return (DDI_FAILURE);
1079 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1080 	macp->m_driver = vnetp;
1081 	macp->m_dip = vnetp->dip;
1082 	macp->m_src_addr = vnetp->curr_macaddr;
1083 	macp->m_callbacks = &vnet_m_callbacks;
1084 	macp->m_min_sdu = 0;
1085 	macp->m_max_sdu = vnetp->mtu;
1086 	macp->m_margin = VLAN_TAGSZ;
1087 
1088 	/*
1089 	 * MAC_VIRT_SERIALIZE flag is needed while hybridIO is enabled to
1090 	 * workaround tx lock contention issues in nxge.
1091 	 */
1092 	macp->m_v12n = MAC_VIRT_LEVEL1;
1093 	if (vnet_mac_tx_serialize == B_TRUE) {
1094 		macp->m_v12n |= MAC_VIRT_SERIALIZE;
1095 	}
1096 
1097 	/*
1098 	 * Finally, we're ready to register ourselves with the MAC layer
1099 	 * interface; if this succeeds, we're all ready to start()
1100 	 */
1101 	err = mac_register(macp, &vnetp->mh);
1102 	mac_free(macp);
1103 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
1104 }
1105 
1106 /* read the mac address of the device */
1107 static int
1108 vnet_read_mac_address(vnet_t *vnetp)
1109 {
1110 	uchar_t 	*macaddr;
1111 	uint32_t 	size;
1112 	int 		rv;
1113 
1114 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
1115 	    DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
1116 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
1117 		DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
1118 		    macaddr_propname, rv);
1119 		return (DDI_FAILURE);
1120 	}
1121 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
1122 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
1123 	ddi_prop_free(macaddr);
1124 
1125 	return (DDI_SUCCESS);
1126 }
1127 
1128 static void
1129 vnet_fdb_create(vnet_t *vnetp)
1130 {
1131 	char		hashname[MAXNAMELEN];
1132 
1133 	(void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
1134 	    vnetp->instance);
1135 	vnetp->fdb_nchains = vnet_fdb_nchains;
1136 	vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
1137 	    mod_hash_null_valdtor, sizeof (void *));
1138 }
1139 
1140 static void
1141 vnet_fdb_destroy(vnet_t *vnetp)
1142 {
1143 	/* destroy fdb-hash-table */
1144 	if (vnetp->fdb_hashp != NULL) {
1145 		mod_hash_destroy_hash(vnetp->fdb_hashp);
1146 		vnetp->fdb_hashp = NULL;
1147 		vnetp->fdb_nchains = 0;
1148 	}
1149 }
1150 
1151 /*
1152  * Add an entry into the fdb.
1153  */
1154 void
1155 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp)
1156 {
1157 	uint64_t	addr = 0;
1158 	int		rv;
1159 
1160 	KEY_HASH(addr, vresp->rem_macaddr);
1161 
1162 	/*
1163 	 * If the entry being added corresponds to LDC_SERVICE resource,
1164 	 * that is, vswitch connection, it is added to the hash and also
1165 	 * the entry is cached, an additional reference count reflects
1166 	 * this. The HYBRID resource is not added to the hash, but only
1167 	 * cached, as it is only used for sending out packets for unknown
1168 	 * unicast destinations.
1169 	 */
1170 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
1171 	    (vresp->refcnt = 1) : (vresp->refcnt = 0);
1172 
1173 	/*
1174 	 * Note: duplicate keys will be rejected by mod_hash.
1175 	 */
1176 	if (vresp->type != VIO_NET_RES_HYBRID) {
1177 		rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
1178 		    (mod_hash_val_t)vresp);
1179 		if (rv != 0) {
1180 			DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
1181 			return;
1182 		}
1183 	}
1184 
1185 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
1186 		/* Cache the fdb entry to vsw-port */
1187 		WRITE_ENTER(&vnetp->vsw_fp_rw);
1188 		if (vnetp->vsw_fp == NULL)
1189 			vnetp->vsw_fp = vresp;
1190 		RW_EXIT(&vnetp->vsw_fp_rw);
1191 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
1192 		/* Cache the fdb entry to hybrid resource */
1193 		WRITE_ENTER(&vnetp->vsw_fp_rw);
1194 		if (vnetp->hio_fp == NULL)
1195 			vnetp->hio_fp = vresp;
1196 		RW_EXIT(&vnetp->vsw_fp_rw);
1197 	}
1198 }
1199 
1200 /*
1201  * Remove an entry from fdb.
1202  */
1203 static void
1204 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp)
1205 {
1206 	uint64_t	addr = 0;
1207 	int		rv;
1208 	uint32_t	refcnt;
1209 	vnet_res_t	*tmp;
1210 
1211 	KEY_HASH(addr, vresp->rem_macaddr);
1212 
1213 	/*
1214 	 * Remove the entry from fdb hash table.
1215 	 * This prevents further references to this fdb entry.
1216 	 */
1217 	if (vresp->type != VIO_NET_RES_HYBRID) {
1218 		rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
1219 		    (mod_hash_val_t *)&tmp);
1220 		if (rv != 0) {
1221 			/*
1222 			 * As the resources are added to the hash only
1223 			 * after they are started, this can occur if
1224 			 * a resource unregisters before it is ever started.
1225 			 */
1226 			return;
1227 		}
1228 	}
1229 
1230 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
1231 		WRITE_ENTER(&vnetp->vsw_fp_rw);
1232 
1233 		ASSERT(tmp == vnetp->vsw_fp);
1234 		vnetp->vsw_fp = NULL;
1235 
1236 		RW_EXIT(&vnetp->vsw_fp_rw);
1237 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
1238 		WRITE_ENTER(&vnetp->vsw_fp_rw);
1239 
1240 		vnetp->hio_fp = NULL;
1241 
1242 		RW_EXIT(&vnetp->vsw_fp_rw);
1243 	}
1244 
1245 	/*
1246 	 * If there are threads already ref holding before the entry was
1247 	 * removed from hash table, then wait for ref count to drop to zero.
1248 	 */
1249 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
1250 	    (refcnt = 1) : (refcnt = 0);
1251 	while (vresp->refcnt > refcnt) {
1252 		delay(drv_usectohz(vnet_fdbe_refcnt_delay));
1253 	}
1254 }
1255 
1256 /*
1257  * Search fdb for a given mac address. If an entry is found, hold
1258  * a reference to it and return the entry; else returns NULL.
1259  */
1260 static vnet_res_t *
1261 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
1262 {
1263 	uint64_t	key = 0;
1264 	vnet_res_t	*vresp;
1265 	int		rv;
1266 
1267 	KEY_HASH(key, addrp->ether_addr_octet);
1268 
1269 	rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
1270 	    (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb);
1271 
1272 	if (rv != 0)
1273 		return (NULL);
1274 
1275 	return (vresp);
1276 }
1277 
1278 /*
1279  * Callback function provided to mod_hash_find_cb(). After finding the fdb
1280  * entry corresponding to the key (macaddr), this callback will be invoked by
1281  * mod_hash_find_cb() to atomically increment the reference count on the fdb
1282  * entry before returning the found entry.
1283  */
1284 static void
1285 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
1286 {
1287 	_NOTE(ARGUNUSED(key))
1288 	VNET_FDBE_REFHOLD((vnet_res_t *)val);
1289 }
1290 
1291 /*
1292  * Frames received that are tagged with the pvid of the vnet device must be
1293  * untagged before sending up the stack. This function walks the chain of rx
1294  * frames, untags any such frames and returns the updated chain.
1295  *
1296  * Arguments:
1297  *    pvid:  pvid of the vnet device for which packets are being received
1298  *    mp:    head of pkt chain to be validated and untagged
1299  *
1300  * Returns:
1301  *    mp:    head of updated chain of packets
1302  */
1303 static void
1304 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp)
1305 {
1306 	struct ether_vlan_header	*evhp;
1307 	mblk_t				*bp;
1308 	mblk_t				*bpt;
1309 	mblk_t				*bph;
1310 	mblk_t				*bpn;
1311 
1312 	bpn = bph = bpt = NULL;
1313 
1314 	for (bp = *mp; bp != NULL; bp = bpn) {
1315 
1316 		bpn = bp->b_next;
1317 		bp->b_next = bp->b_prev = NULL;
1318 
1319 		evhp = (struct ether_vlan_header *)bp->b_rptr;
1320 
1321 		if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN &&
1322 		    VLAN_ID(ntohs(evhp->ether_tci)) == pvid) {
1323 
1324 			bp = vnet_vlan_remove_tag(bp);
1325 			if (bp == NULL) {
1326 				continue;
1327 			}
1328 
1329 		}
1330 
1331 		/* build a chain of processed packets */
1332 		if (bph == NULL) {
1333 			bph = bpt = bp;
1334 		} else {
1335 			bpt->b_next = bp;
1336 			bpt = bp;
1337 		}
1338 
1339 	}
1340 
1341 	*mp = bph;
1342 }
1343 
1344 static void
1345 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
1346 {
1347 	vnet_res_t		*vresp = (vnet_res_t *)vrh;
1348 	vnet_t			*vnetp = vresp->vnetp;
1349 	vnet_pseudo_rx_ring_t	*ringp;
1350 
1351 	if ((vnetp == NULL) || (vnetp->mh == 0)) {
1352 		freemsgchain(mp);
1353 		return;
1354 	}
1355 
1356 	ringp = vresp->rx_ringp;
1357 	mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
1358 }
1359 
1360 void
1361 vnet_tx_update(vio_net_handle_t vrh)
1362 {
1363 	vnet_res_t		*vresp = (vnet_res_t *)vrh;
1364 	vnet_t			*vnetp = vresp->vnetp;
1365 	vnet_pseudo_tx_ring_t	*tx_ringp;
1366 	vnet_pseudo_tx_group_t	*tx_grp;
1367 	int			i;
1368 
1369 	if (vnetp == NULL || vnetp->mh == NULL) {
1370 		return;
1371 	}
1372 
1373 	/*
1374 	 * Currently, the tx hwring API (used to access rings that belong to
1375 	 * a Hybrid IO resource) does not provide us a per ring flow ctrl
1376 	 * update; also the pseudo rings are shared by the ports/ldcs in the
1377 	 * vgen layer. Thus we can't figure out which pseudo ring is being
1378 	 * re-enabled for transmits. To work around this, when we get a tx
1379 	 * restart notification from below, we simply propagate that to all
1380 	 * the tx pseudo rings registered with the mac layer above.
1381 	 *
1382 	 * There are a couple of side effects with this approach, but they are
1383 	 * not harmful, as outlined below:
1384 	 *
1385 	 * A) We might send an invalid ring_update() for a ring that is not
1386 	 * really flow controlled. This will not have any effect in the mac
1387 	 * layer and packets will continue to be transmitted on that ring.
1388 	 *
1389 	 * B) We might end up clearing the flow control in the mac layer for
1390 	 * a ring that is still flow controlled in the underlying resource.
1391 	 * This will result in the mac layer restarting	transmit, only to be
1392 	 * flow controlled again on that ring.
1393 	 */
1394 	tx_grp = &vnetp->tx_grp[0];
1395 	for (i = 0; i < tx_grp->ring_cnt; i++) {
1396 		tx_ringp = &tx_grp->rings[i];
1397 		mac_tx_ring_update(vnetp->mh, tx_ringp->handle);
1398 	}
1399 }
1400 
1401 /*
1402  * Update the new mtu of vnet into the mac layer. First check if the device has
1403  * been plumbed and if so fail the mtu update. Returns 0 on success.
1404  */
1405 int
1406 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu)
1407 {
1408 	int	rv;
1409 
1410 	if (vnetp == NULL || vnetp->mh == NULL) {
1411 		return (EINVAL);
1412 	}
1413 
1414 	WRITE_ENTER(&vnetp->vrwlock);
1415 
1416 	if (vnetp->flags & VNET_STARTED) {
1417 		RW_EXIT(&vnetp->vrwlock);
1418 		cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu "
1419 		    "update as the device is plumbed\n",
1420 		    vnetp->instance);
1421 		return (EBUSY);
1422 	}
1423 
1424 	/* update mtu in the mac layer */
1425 	rv = mac_maxsdu_update(vnetp->mh, mtu);
1426 	if (rv != 0) {
1427 		RW_EXIT(&vnetp->vrwlock);
1428 		cmn_err(CE_NOTE,
1429 		    "!vnet%d: Unable to update mtu with mac layer\n",
1430 		    vnetp->instance);
1431 		return (EIO);
1432 	}
1433 
1434 	vnetp->mtu = mtu;
1435 
1436 	RW_EXIT(&vnetp->vrwlock);
1437 
1438 	return (0);
1439 }
1440 
1441 /*
1442  * Update the link state of vnet to the mac layer.
1443  */
1444 void
1445 vnet_link_update(vnet_t *vnetp, link_state_t link_state)
1446 {
1447 	if (vnetp == NULL || vnetp->mh == NULL) {
1448 		return;
1449 	}
1450 
1451 	WRITE_ENTER(&vnetp->vrwlock);
1452 	if (vnetp->link_state == link_state) {
1453 		RW_EXIT(&vnetp->vrwlock);
1454 		return;
1455 	}
1456 	vnetp->link_state = link_state;
1457 	RW_EXIT(&vnetp->vrwlock);
1458 
1459 	mac_link_update(vnetp->mh, link_state);
1460 }
1461 
1462 /*
1463  * vio_net_resource_reg -- An interface called to register a resource
1464  *	with vnet.
1465  *	macp -- a GLDv3 mac_register that has all the details of
1466  *		a resource and its callbacks etc.
1467  *	type -- resource type.
1468  *	local_macaddr -- resource's MAC address. This is used to
1469  *			 associate a resource with a corresponding vnet.
1470  *	remote_macaddr -- remote side MAC address. This is ignored for
1471  *			  the Hybrid resources.
1472  *	vhp -- A handle returned to the caller.
1473  *	vcb -- A set of callbacks provided to the callers.
1474  */
1475 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
1476     ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
1477     vio_net_callbacks_t *vcb)
1478 {
1479 	vnet_t		*vnetp;
1480 	vnet_res_t	*vresp;
1481 
1482 	vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
1483 	ether_copy(local_macaddr, vresp->local_macaddr);
1484 	ether_copy(rem_macaddr, vresp->rem_macaddr);
1485 	vresp->type = type;
1486 	bcopy(macp, &vresp->macreg, sizeof (mac_register_t));
1487 
1488 	DBG1(NULL, "Resource Registerig type=0%X\n", type);
1489 
1490 	READ_ENTER(&vnet_rw);
1491 	vnetp = vnet_headp;
1492 	while (vnetp != NULL) {
1493 		if (VNET_MATCH_RES(vresp, vnetp)) {
1494 			vresp->vnetp = vnetp;
1495 
1496 			/* Setup kstats for hio resource */
1497 			if (vresp->type == VIO_NET_RES_HYBRID) {
1498 				vresp->ksp = vnet_hio_setup_kstats(DRV_NAME,
1499 				    "hio", vresp);
1500 				if (vresp->ksp == NULL) {
1501 					cmn_err(CE_NOTE, "!vnet%d: Cannot "
1502 					    "create kstats for hio resource",
1503 					    vnetp->instance);
1504 				}
1505 			}
1506 			vnet_add_resource(vnetp, vresp);
1507 			break;
1508 		}
1509 		vnetp = vnetp->nextp;
1510 	}
1511 	RW_EXIT(&vnet_rw);
1512 	if (vresp->vnetp == NULL) {
1513 		DWARN(NULL, "No vnet instance");
1514 		kmem_free(vresp, sizeof (vnet_res_t));
1515 		return (ENXIO);
1516 	}
1517 
1518 	*vhp = vresp;
1519 	vcb->vio_net_rx_cb = vnet_rx;
1520 	vcb->vio_net_tx_update = vnet_tx_update;
1521 	vcb->vio_net_report_err = vnet_handle_res_err;
1522 
1523 	/* Bind the resource to pseudo ring(s) */
1524 	if (vnet_bind_rings(vresp) != 0) {
1525 		(void) vnet_rem_resource(vnetp, vresp);
1526 		vnet_hio_destroy_kstats(vresp->ksp);
1527 		KMEM_FREE(vresp);
1528 		return (1);
1529 	}
1530 
1531 	/* Dispatch a task to start resources */
1532 	vnet_dispatch_res_task(vnetp);
1533 	return (0);
1534 }
1535 
1536 /*
1537  * vio_net_resource_unreg -- An interface to unregister a resource.
1538  */
1539 void
1540 vio_net_resource_unreg(vio_net_handle_t vhp)
1541 {
1542 	vnet_res_t	*vresp = (vnet_res_t *)vhp;
1543 	vnet_t		*vnetp = vresp->vnetp;
1544 
1545 	DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
1546 
1547 	ASSERT(vnetp != NULL);
1548 	/*
1549 	 * Remove the resource from fdb; this ensures
1550 	 * there are no references to the resource.
1551 	 */
1552 	vnet_fdbe_del(vnetp, vresp);
1553 
1554 	vnet_unbind_rings(vresp);
1555 
1556 	/* Now remove the resource from the list */
1557 	(void) vnet_rem_resource(vnetp, vresp);
1558 
1559 	vnet_hio_destroy_kstats(vresp->ksp);
1560 	KMEM_FREE(vresp);
1561 }
1562 
1563 static void
1564 vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp)
1565 {
1566 	WRITE_ENTER(&vnetp->vrwlock);
1567 	vresp->nextp = vnetp->vres_list;
1568 	vnetp->vres_list = vresp;
1569 	RW_EXIT(&vnetp->vrwlock);
1570 }
1571 
1572 static vnet_res_t *
1573 vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp)
1574 {
1575 	vnet_res_t	*vrp;
1576 
1577 	WRITE_ENTER(&vnetp->vrwlock);
1578 	if (vresp == vnetp->vres_list) {
1579 		vnetp->vres_list = vresp->nextp;
1580 	} else {
1581 		vrp = vnetp->vres_list;
1582 		while (vrp->nextp != NULL) {
1583 			if (vrp->nextp == vresp) {
1584 				vrp->nextp = vresp->nextp;
1585 				break;
1586 			}
1587 			vrp = vrp->nextp;
1588 		}
1589 	}
1590 	vresp->vnetp = NULL;
1591 	vresp->nextp = NULL;
1592 
1593 	RW_EXIT(&vnetp->vrwlock);
1594 
1595 	return (vresp);
1596 }
1597 
1598 /*
1599  * vnet_dds_rx -- an interface called by vgen to DDS messages.
1600  */
1601 void
1602 vnet_dds_rx(void *arg, void *dmsg)
1603 {
1604 	vnet_t *vnetp = arg;
1605 	vdds_process_dds_msg(vnetp, dmsg);
1606 }
1607 
1608 /*
1609  * vnet_send_dds_msg -- An interface provided to DDS to send
1610  *	DDS messages. This simply sends meessages via vgen.
1611  */
1612 int
1613 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg)
1614 {
1615 	int rv;
1616 
1617 	if (vnetp->vgenhdl != NULL) {
1618 		rv = vgen_dds_tx(vnetp->vgenhdl, dmsg);
1619 	}
1620 	return (rv);
1621 }
1622 
1623 /*
1624  * vnet_cleanup_hio -- an interface called by vgen to cleanup hio resources.
1625  */
1626 void
1627 vnet_dds_cleanup_hio(vnet_t *vnetp)
1628 {
1629 	vdds_cleanup_hio(vnetp);
1630 }
1631 
1632 /*
1633  * vnet_handle_res_err -- A callback function called by a resource
1634  *	to report an error. For example, vgen can call to report
1635  *	an LDC down/reset event. This will trigger cleanup of associated
1636  *	Hybrid resource.
1637  */
1638 /* ARGSUSED */
1639 static void
1640 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err)
1641 {
1642 	vnet_res_t *vresp = (vnet_res_t *)vrh;
1643 	vnet_t *vnetp = vresp->vnetp;
1644 
1645 	if (vnetp == NULL) {
1646 		return;
1647 	}
1648 	if ((vresp->type != VIO_NET_RES_LDC_SERVICE) &&
1649 	    (vresp->type != VIO_NET_RES_HYBRID)) {
1650 		return;
1651 	}
1652 
1653 	vdds_cleanup_hio(vnetp);
1654 }
1655 
1656 /*
1657  * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
1658  */
1659 static void
1660 vnet_dispatch_res_task(vnet_t *vnetp)
1661 {
1662 	int rv;
1663 
1664 	/*
1665 	 * Dispatch the task. It could be the case that vnetp->flags does
1666 	 * not have VNET_STARTED set. This is ok as vnet_rest_start_task()
1667 	 * can abort the task when the task is started. See related comments
1668 	 * in vnet_m_stop() and vnet_stop_resources().
1669 	 */
1670 	rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task,
1671 	    vnetp, DDI_NOSLEEP);
1672 	if (rv != DDI_SUCCESS) {
1673 		cmn_err(CE_WARN,
1674 		    "vnet%d:Can't dispatch start resource task",
1675 		    vnetp->instance);
1676 	}
1677 }
1678 
1679 /*
1680  * vnet_res_start_task -- A taskq callback function that starts a resource.
1681  */
1682 static void
1683 vnet_res_start_task(void *arg)
1684 {
1685 	vnet_t *vnetp = arg;
1686 
1687 	WRITE_ENTER(&vnetp->vrwlock);
1688 	if (vnetp->flags & VNET_STARTED) {
1689 		vnet_start_resources(vnetp);
1690 	}
1691 	RW_EXIT(&vnetp->vrwlock);
1692 }
1693 
1694 /*
1695  * vnet_start_resources -- starts all resources associated with
1696  *	a vnet.
1697  */
1698 static void
1699 vnet_start_resources(vnet_t *vnetp)
1700 {
1701 	mac_register_t	*macp;
1702 	mac_callbacks_t	*cbp;
1703 	vnet_res_t	*vresp;
1704 	int rv;
1705 
1706 	DBG1(vnetp, "enter\n");
1707 
1708 	ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
1709 
1710 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
1711 		/* skip if it is already started */
1712 		if (vresp->flags & VNET_STARTED) {
1713 			continue;
1714 		}
1715 		macp = &vresp->macreg;
1716 		cbp = macp->m_callbacks;
1717 		rv = cbp->mc_start(macp->m_driver);
1718 		if (rv == 0) {
1719 			/*
1720 			 * Successfully started the resource, so now
1721 			 * add it to the fdb.
1722 			 */
1723 			vresp->flags |= VNET_STARTED;
1724 			vnet_fdbe_add(vnetp, vresp);
1725 		}
1726 	}
1727 
1728 	DBG1(vnetp, "exit\n");
1729 
1730 }
1731 
1732 /*
1733  * vnet_stop_resources -- stop all resources associated with a vnet.
1734  */
1735 static void
1736 vnet_stop_resources(vnet_t *vnetp)
1737 {
1738 	vnet_res_t	*vresp;
1739 	mac_register_t	*macp;
1740 	mac_callbacks_t	*cbp;
1741 
1742 	DBG1(vnetp, "enter\n");
1743 
1744 	ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
1745 
1746 	for (vresp = vnetp->vres_list; vresp != NULL; ) {
1747 		if (vresp->flags & VNET_STARTED) {
1748 			/*
1749 			 * Release the lock while invoking mc_stop() of the
1750 			 * underlying resource. We hold a reference to this
1751 			 * resource to prevent being removed from the list in
1752 			 * vio_net_resource_unreg(). Note that new resources
1753 			 * can be added to the head of the list while the lock
1754 			 * is released, but they won't be started, as
1755 			 * VNET_STARTED flag has been cleared for the vnet
1756 			 * device in vnet_m_stop(). Also, while the lock is
1757 			 * released a resource could be removed from the list
1758 			 * in vio_net_resource_unreg(); but that is ok, as we
1759 			 * re-acquire the lock and only then access the forward
1760 			 * link (vresp->nextp) to continue with the next
1761 			 * resource.
1762 			 */
1763 			vresp->flags &= ~VNET_STARTED;
1764 			vresp->flags |= VNET_STOPPING;
1765 			macp = &vresp->macreg;
1766 			cbp = macp->m_callbacks;
1767 			VNET_FDBE_REFHOLD(vresp);
1768 			RW_EXIT(&vnetp->vrwlock);
1769 
1770 			cbp->mc_stop(macp->m_driver);
1771 
1772 			WRITE_ENTER(&vnetp->vrwlock);
1773 			vresp->flags &= ~VNET_STOPPING;
1774 			VNET_FDBE_REFRELE(vresp);
1775 		}
1776 		vresp = vresp->nextp;
1777 	}
1778 	DBG1(vnetp, "exit\n");
1779 }
1780 
1781 /*
1782  * Setup kstats for the HIO statistics.
1783  * NOTE: the synchronization for the statistics is the
1784  * responsibility of the caller.
1785  */
1786 kstat_t *
1787 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp)
1788 {
1789 	kstat_t *ksp;
1790 	vnet_t *vnetp = vresp->vnetp;
1791 	vnet_hio_kstats_t *hiokp;
1792 	size_t size;
1793 
1794 	ASSERT(vnetp != NULL);
1795 	size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t);
1796 	ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net",
1797 	    KSTAT_TYPE_NAMED, size, 0);
1798 	if (ksp == NULL) {
1799 		return (NULL);
1800 	}
1801 
1802 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1803 	kstat_named_init(&hiokp->ipackets,		"ipackets",
1804 	    KSTAT_DATA_ULONG);
1805 	kstat_named_init(&hiokp->ierrors,		"ierrors",
1806 	    KSTAT_DATA_ULONG);
1807 	kstat_named_init(&hiokp->opackets,		"opackets",
1808 	    KSTAT_DATA_ULONG);
1809 	kstat_named_init(&hiokp->oerrors,		"oerrors",
1810 	    KSTAT_DATA_ULONG);
1811 
1812 
1813 	/* MIB II kstat variables */
1814 	kstat_named_init(&hiokp->rbytes,		"rbytes",
1815 	    KSTAT_DATA_ULONG);
1816 	kstat_named_init(&hiokp->obytes,		"obytes",
1817 	    KSTAT_DATA_ULONG);
1818 	kstat_named_init(&hiokp->multircv,		"multircv",
1819 	    KSTAT_DATA_ULONG);
1820 	kstat_named_init(&hiokp->multixmt,		"multixmt",
1821 	    KSTAT_DATA_ULONG);
1822 	kstat_named_init(&hiokp->brdcstrcv,		"brdcstrcv",
1823 	    KSTAT_DATA_ULONG);
1824 	kstat_named_init(&hiokp->brdcstxmt,		"brdcstxmt",
1825 	    KSTAT_DATA_ULONG);
1826 	kstat_named_init(&hiokp->norcvbuf,		"norcvbuf",
1827 	    KSTAT_DATA_ULONG);
1828 	kstat_named_init(&hiokp->noxmtbuf,		"noxmtbuf",
1829 	    KSTAT_DATA_ULONG);
1830 
1831 	ksp->ks_update = vnet_hio_update_kstats;
1832 	ksp->ks_private = (void *)vresp;
1833 	kstat_install(ksp);
1834 	return (ksp);
1835 }
1836 
1837 /*
1838  * Destroy kstats.
1839  */
1840 static void
1841 vnet_hio_destroy_kstats(kstat_t *ksp)
1842 {
1843 	if (ksp != NULL)
1844 		kstat_delete(ksp);
1845 }
1846 
1847 /*
1848  * Update the kstats.
1849  */
1850 static int
1851 vnet_hio_update_kstats(kstat_t *ksp, int rw)
1852 {
1853 	vnet_t *vnetp;
1854 	vnet_res_t *vresp;
1855 	vnet_hio_stats_t statsp;
1856 	vnet_hio_kstats_t *hiokp;
1857 
1858 	vresp = (vnet_res_t *)ksp->ks_private;
1859 	vnetp = vresp->vnetp;
1860 
1861 	bzero(&statsp, sizeof (vnet_hio_stats_t));
1862 
1863 	READ_ENTER(&vnetp->vsw_fp_rw);
1864 	if (vnetp->hio_fp == NULL) {
1865 		/* not using hio resources, just return */
1866 		RW_EXIT(&vnetp->vsw_fp_rw);
1867 		return (0);
1868 	}
1869 	VNET_FDBE_REFHOLD(vnetp->hio_fp);
1870 	RW_EXIT(&vnetp->vsw_fp_rw);
1871 	vnet_hio_get_stats(vnetp->hio_fp, &statsp);
1872 	VNET_FDBE_REFRELE(vnetp->hio_fp);
1873 
1874 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1875 
1876 	if (rw == KSTAT_READ) {
1877 		/* Link Input/Output stats */
1878 		hiokp->ipackets.value.ul	= (uint32_t)statsp.ipackets;
1879 		hiokp->ipackets64.value.ull	= statsp.ipackets;
1880 		hiokp->ierrors.value.ul		= statsp.ierrors;
1881 		hiokp->opackets.value.ul	= (uint32_t)statsp.opackets;
1882 		hiokp->opackets64.value.ull	= statsp.opackets;
1883 		hiokp->oerrors.value.ul		= statsp.oerrors;
1884 
1885 		/* MIB II kstat variables */
1886 		hiokp->rbytes.value.ul		= (uint32_t)statsp.rbytes;
1887 		hiokp->rbytes64.value.ull	= statsp.rbytes;
1888 		hiokp->obytes.value.ul		= (uint32_t)statsp.obytes;
1889 		hiokp->obytes64.value.ull	= statsp.obytes;
1890 		hiokp->multircv.value.ul	= statsp.multircv;
1891 		hiokp->multixmt.value.ul	= statsp.multixmt;
1892 		hiokp->brdcstrcv.value.ul	= statsp.brdcstrcv;
1893 		hiokp->brdcstxmt.value.ul	= statsp.brdcstxmt;
1894 		hiokp->norcvbuf.value.ul	= statsp.norcvbuf;
1895 		hiokp->noxmtbuf.value.ul	= statsp.noxmtbuf;
1896 	} else {
1897 		return (EACCES);
1898 	}
1899 
1900 	return (0);
1901 }
1902 
1903 static void
1904 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp)
1905 {
1906 	mac_register_t		*macp;
1907 	mac_callbacks_t		*cbp;
1908 	uint64_t		val;
1909 	int			stat;
1910 
1911 	/*
1912 	 * get the specified statistics from the underlying nxge.
1913 	 */
1914 	macp = &vresp->macreg;
1915 	cbp = macp->m_callbacks;
1916 	for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) {
1917 		if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) {
1918 			switch (stat) {
1919 			case MAC_STAT_IPACKETS:
1920 				statsp->ipackets = val;
1921 				break;
1922 
1923 			case MAC_STAT_IERRORS:
1924 				statsp->ierrors = val;
1925 				break;
1926 
1927 			case MAC_STAT_OPACKETS:
1928 				statsp->opackets = val;
1929 				break;
1930 
1931 			case MAC_STAT_OERRORS:
1932 				statsp->oerrors = val;
1933 				break;
1934 
1935 			case MAC_STAT_RBYTES:
1936 				statsp->rbytes = val;
1937 				break;
1938 
1939 			case MAC_STAT_OBYTES:
1940 				statsp->obytes = val;
1941 				break;
1942 
1943 			case MAC_STAT_MULTIRCV:
1944 				statsp->multircv = val;
1945 				break;
1946 
1947 			case MAC_STAT_MULTIXMT:
1948 				statsp->multixmt = val;
1949 				break;
1950 
1951 			case MAC_STAT_BRDCSTRCV:
1952 				statsp->brdcstrcv = val;
1953 				break;
1954 
1955 			case MAC_STAT_BRDCSTXMT:
1956 				statsp->brdcstxmt = val;
1957 				break;
1958 
1959 			case MAC_STAT_NOXMTBUF:
1960 				statsp->noxmtbuf = val;
1961 				break;
1962 
1963 			case MAC_STAT_NORCVBUF:
1964 				statsp->norcvbuf = val;
1965 				break;
1966 
1967 			default:
1968 				/*
1969 				 * parameters not interested.
1970 				 */
1971 				break;
1972 			}
1973 		}
1974 	}
1975 }
1976 
1977 static boolean_t
1978 vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data)
1979 {
1980 	vnet_t	*vnetp = (vnet_t *)arg;
1981 
1982 	if (vnetp == NULL) {
1983 		return (0);
1984 	}
1985 
1986 	switch (cap) {
1987 
1988 	case MAC_CAPAB_RINGS: {
1989 
1990 		mac_capab_rings_t *cap_rings = cap_data;
1991 		/*
1992 		 * Rings Capability Notes:
1993 		 * We advertise rings to make use of the rings framework in
1994 		 * gldv3 mac layer, to improve the performance. This is
1995 		 * specifically needed when a Hybrid resource (with multiple
1996 		 * tx/rx hardware rings) is assigned to a vnet device. We also
1997 		 * leverage this for the normal case when no Hybrid resource is
1998 		 * assigned.
1999 		 *
2000 		 * Ring Allocation:
2001 		 * - TX path:
2002 		 * We expose a pseudo ring group with 2 pseudo tx rings (as
2003 		 * currently HybridIO exports only 2 rings) In the normal case,
2004 		 * transmit traffic that comes down to the driver through the
2005 		 * mri_tx (vnet_tx_ring_send()) entry point goes through the
2006 		 * distributed switching algorithm in vnet and gets transmitted
2007 		 * over a port/LDC in the vgen layer to either the vswitch or a
2008 		 * peer vnet. If and when a Hybrid resource is assigned to the
2009 		 * vnet, we obtain the tx ring information of the Hybrid device
2010 		 * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings.
2011 		 * Traffic being sent over the Hybrid resource by the mac layer
2012 		 * gets spread across both hw rings, as they are mapped to the
2013 		 * 2 pseudo tx rings in vnet.
2014 		 *
2015 		 * - RX path:
2016 		 * We expose a pseudo ring group with 3 pseudo rx rings (static
2017 		 * rings) initially. The first (default) pseudo rx ring is
2018 		 * reserved for the resource that connects to the vswitch
2019 		 * service. The next 2 rings are reserved for a Hybrid resource
2020 		 * that may be assigned to the vnet device. If and when a
2021 		 * Hybrid resource is assigned to the vnet, we obtain the rx
2022 		 * ring information of the Hybrid device (nxge) and map these
2023 		 * pseudo rings 1:1 to the 2 hw rx rings. For each additional
2024 		 * resource that connects to a peer vnet, we dynamically
2025 		 * allocate a pseudo rx ring and map it to that resource, when
2026 		 * the resource gets added; and the pseudo rx ring is
2027 		 * dynamically registered with the upper mac layer. We do the
2028 		 * reverse and unregister the ring with the mac layer when
2029 		 * the resource gets removed.
2030 		 *
2031 		 * Synchronization notes:
2032 		 * We don't need any lock to protect members of ring structure,
2033 		 * specifically ringp->hw_rh, in either the TX or the RX ring,
2034 		 * as explained below.
2035 		 * - TX ring:
2036 		 * ring->hw_rh is initialized only when a Hybrid resource is
2037 		 * associated; and gets referenced only in vnet_hio_tx(). The
2038 		 * Hybrid resource itself is available in fdb only after tx
2039 		 * hwrings are found and mapped; i.e, in vio_net_resource_reg()
2040 		 * we call vnet_bind_rings() first and then call
2041 		 * vnet_start_resources() which adds an entry to fdb. For
2042 		 * traffic going over LDC resources, we don't reference
2043 		 * ring->hw_rh at all.
2044 		 * - RX ring:
2045 		 * For rings mapped to Hybrid resource ring->hw_rh is
2046 		 * initialized and only then do we add the rx callback for
2047 		 * the underlying Hybrid resource; we disable callbacks before
2048 		 * we unmap ring->hw_rh. For rings mapped to LDC resources, we
2049 		 * stop the rx callbacks (in vgen) before we remove ring->hw_rh
2050 		 * (vio_net_resource_unreg()).
2051 		 */
2052 
2053 		if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
2054 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2055 
2056 			/*
2057 			 * The ring_cnt for rx grp is initialized in
2058 			 * vnet_ring_grp_init(). Later, the ring_cnt gets
2059 			 * updated dynamically whenever LDC resources are added
2060 			 * or removed.
2061 			 */
2062 			cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt;
2063 			cap_rings->mr_rget = vnet_get_ring;
2064 
2065 			cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS;
2066 			cap_rings->mr_gget = vnet_get_group;
2067 			cap_rings->mr_gaddring = NULL;
2068 			cap_rings->mr_gremring = NULL;
2069 		} else {
2070 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2071 
2072 			/*
2073 			 * The ring_cnt for tx grp is initialized in
2074 			 * vnet_ring_grp_init() and remains constant, as we
2075 			 * do not support dymanic tx rings for now.
2076 			 */
2077 			cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt;
2078 			cap_rings->mr_rget = vnet_get_ring;
2079 
2080 			/*
2081 			 * Transmit rings are not grouped; i.e, the number of
2082 			 * transmit ring groups advertised should be set to 0.
2083 			 */
2084 			cap_rings->mr_gnum = 0;
2085 
2086 			cap_rings->mr_gget = vnet_get_group;
2087 			cap_rings->mr_gaddring = NULL;
2088 			cap_rings->mr_gremring = NULL;
2089 		}
2090 		return (B_TRUE);
2091 
2092 	}
2093 
2094 	default:
2095 		break;
2096 
2097 	}
2098 
2099 	return (B_FALSE);
2100 }
2101 
2102 /*
2103  * Callback funtion for MAC layer to get ring information.
2104  */
2105 static void
2106 vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
2107     const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle)
2108 {
2109 	vnet_t	*vnetp = arg;
2110 
2111 	switch (rtype) {
2112 
2113 	case MAC_RING_TYPE_RX: {
2114 
2115 		vnet_pseudo_rx_group_t	*rx_grp;
2116 		vnet_pseudo_rx_ring_t	*rx_ringp;
2117 		mac_intr_t		*mintr;
2118 
2119 		/* We advertised only one RX group */
2120 		ASSERT(g_index == 0);
2121 		rx_grp = &vnetp->rx_grp[g_index];
2122 
2123 		/* Check the current # of rings in the rx group */
2124 		ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt));
2125 
2126 		/* Get the ring based on the index */
2127 		rx_ringp = &rx_grp->rings[r_index];
2128 
2129 		rx_ringp->handle = r_handle;
2130 		/*
2131 		 * Note: we don't need to save the incoming r_index in rx_ring,
2132 		 * as vnet_ring_grp_init() would have initialized the index for
2133 		 * each ring in the array.
2134 		 */
2135 		rx_ringp->grp = rx_grp;
2136 		rx_ringp->vnetp = vnetp;
2137 
2138 		mintr = &infop->mri_intr;
2139 		mintr->mi_handle = (mac_intr_handle_t)rx_ringp;
2140 		mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr;
2141 		mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr;
2142 
2143 		infop->mri_driver = (mac_ring_driver_t)rx_ringp;
2144 		infop->mri_start = vnet_rx_ring_start;
2145 		infop->mri_stop = vnet_rx_ring_stop;
2146 
2147 		/* Set the poll function, as this is an rx ring */
2148 		infop->mri_poll = vnet_rx_poll;
2149 
2150 		break;
2151 	}
2152 
2153 	case MAC_RING_TYPE_TX: {
2154 		vnet_pseudo_tx_group_t	*tx_grp;
2155 		vnet_pseudo_tx_ring_t	*tx_ringp;
2156 
2157 		/*
2158 		 * No need to check grp index; mac layer passes -1 for it.
2159 		 */
2160 		tx_grp = &vnetp->tx_grp[0];
2161 
2162 		/* Check the # of rings in the tx group */
2163 		ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt));
2164 
2165 		/* Get the ring based on the index */
2166 		tx_ringp = &tx_grp->rings[r_index];
2167 
2168 		tx_ringp->handle = r_handle;
2169 		tx_ringp->index = r_index;
2170 		tx_ringp->grp = tx_grp;
2171 		tx_ringp->vnetp = vnetp;
2172 
2173 		infop->mri_driver = (mac_ring_driver_t)tx_ringp;
2174 		infop->mri_start = vnet_tx_ring_start;
2175 		infop->mri_stop = vnet_tx_ring_stop;
2176 
2177 		/* Set the transmit function, as this is a tx ring */
2178 		infop->mri_tx = vnet_tx_ring_send;
2179 
2180 		break;
2181 	}
2182 
2183 	default:
2184 		break;
2185 	}
2186 }
2187 
2188 /*
2189  * Callback funtion for MAC layer to get group information.
2190  */
2191 static void
2192 vnet_get_group(void *arg, mac_ring_type_t type, const int index,
2193 	mac_group_info_t *infop, mac_group_handle_t handle)
2194 {
2195 	vnet_t	*vnetp = (vnet_t *)arg;
2196 
2197 	switch (type) {
2198 
2199 	case MAC_RING_TYPE_RX:
2200 	{
2201 		vnet_pseudo_rx_group_t	*rx_grp;
2202 
2203 		/* We advertised only one RX group */
2204 		ASSERT(index == 0);
2205 
2206 		rx_grp = &vnetp->rx_grp[index];
2207 		rx_grp->handle = handle;
2208 		rx_grp->index = index;
2209 		rx_grp->vnetp = vnetp;
2210 
2211 		infop->mgi_driver = (mac_group_driver_t)rx_grp;
2212 		infop->mgi_start = NULL;
2213 		infop->mgi_stop = NULL;
2214 		infop->mgi_addmac = vnet_addmac;
2215 		infop->mgi_remmac = vnet_remmac;
2216 		infop->mgi_count = rx_grp->ring_cnt;
2217 
2218 		break;
2219 	}
2220 
2221 	case MAC_RING_TYPE_TX:
2222 	{
2223 		vnet_pseudo_tx_group_t	*tx_grp;
2224 
2225 		/* We advertised only one TX group */
2226 		ASSERT(index == 0);
2227 
2228 		tx_grp = &vnetp->tx_grp[index];
2229 		tx_grp->handle = handle;
2230 		tx_grp->index = index;
2231 		tx_grp->vnetp = vnetp;
2232 
2233 		infop->mgi_driver = (mac_group_driver_t)tx_grp;
2234 		infop->mgi_start = NULL;
2235 		infop->mgi_stop = NULL;
2236 		infop->mgi_addmac = NULL;
2237 		infop->mgi_remmac = NULL;
2238 		infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS;
2239 
2240 		break;
2241 	}
2242 
2243 	default:
2244 		break;
2245 
2246 	}
2247 }
2248 
2249 static int
2250 vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
2251 {
2252 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2253 	int			err;
2254 
2255 	/*
2256 	 * If this ring is mapped to a LDC resource, simply mark the state to
2257 	 * indicate the ring is started and return.
2258 	 */
2259 	if ((rx_ringp->state &
2260 	    (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
2261 		rx_ringp->gen_num = mr_gen_num;
2262 		rx_ringp->state |= VNET_RXRING_STARTED;
2263 		return (0);
2264 	}
2265 
2266 	ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2267 
2268 	/*
2269 	 * This must be a ring reserved for a hwring. If the hwring is not
2270 	 * bound yet, simply mark the state to indicate the ring is started and
2271 	 * return. If and when a hybrid resource is activated for this vnet
2272 	 * device, we will bind the hwring and start it then. If a hwring is
2273 	 * already bound, start it now.
2274 	 */
2275 	if (rx_ringp->hw_rh == NULL) {
2276 		rx_ringp->gen_num = mr_gen_num;
2277 		rx_ringp->state |= VNET_RXRING_STARTED;
2278 		return (0);
2279 	}
2280 
2281 	err = mac_hwring_start(rx_ringp->hw_rh);
2282 	if (err == 0) {
2283 		rx_ringp->gen_num = mr_gen_num;
2284 		rx_ringp->state |= VNET_RXRING_STARTED;
2285 	} else {
2286 		err = ENXIO;
2287 	}
2288 
2289 	return (err);
2290 }
2291 
2292 static void
2293 vnet_rx_ring_stop(mac_ring_driver_t arg)
2294 {
2295 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2296 
2297 	/*
2298 	 * If this ring is mapped to a LDC resource, simply mark the state to
2299 	 * indicate the ring is now stopped and return.
2300 	 */
2301 	if ((rx_ringp->state &
2302 	    (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
2303 		rx_ringp->state &= ~VNET_RXRING_STARTED;
2304 		return;
2305 	}
2306 
2307 	ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2308 
2309 	/*
2310 	 * This must be a ring reserved for a hwring. If the hwring is not
2311 	 * bound yet, simply mark the state to indicate the ring is stopped and
2312 	 * return. If a hwring is already bound, stop it now.
2313 	 */
2314 	if (rx_ringp->hw_rh == NULL) {
2315 		rx_ringp->state &= ~VNET_RXRING_STARTED;
2316 		return;
2317 	}
2318 
2319 	mac_hwring_stop(rx_ringp->hw_rh);
2320 	rx_ringp->state &= ~VNET_RXRING_STARTED;
2321 }
2322 
2323 /* ARGSUSED */
2324 static int
2325 vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
2326 {
2327 	vnet_pseudo_tx_ring_t	*tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
2328 
2329 	tx_ringp->state |= VNET_TXRING_STARTED;
2330 	return (0);
2331 }
2332 
2333 static void
2334 vnet_tx_ring_stop(mac_ring_driver_t arg)
2335 {
2336 	vnet_pseudo_tx_ring_t	*tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
2337 
2338 	tx_ringp->state &= ~VNET_TXRING_STARTED;
2339 }
2340 
2341 /*
2342  * Disable polling for a ring and enable its interrupt.
2343  */
2344 static int
2345 vnet_ring_enable_intr(void *arg)
2346 {
2347 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2348 	vnet_res_t		*vresp;
2349 
2350 	if (rx_ringp->hw_rh == NULL) {
2351 		/*
2352 		 * Ring enable intr func is being invoked, but the ring is
2353 		 * not bound to any underlying resource ? This must be a ring
2354 		 * reserved for Hybrid resource and no such resource has been
2355 		 * assigned to this vnet device yet. We simply return success.
2356 		 */
2357 		ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2358 		return (0);
2359 	}
2360 
2361 	/*
2362 	 * The rx ring has been bound to either a LDC or a Hybrid resource.
2363 	 * Call the appropriate function to enable interrupts for the ring.
2364 	 */
2365 	if (rx_ringp->state & VNET_RXRING_HYBRID) {
2366 		return (mac_hwring_enable_intr(rx_ringp->hw_rh));
2367 	} else {
2368 		vresp = (vnet_res_t *)rx_ringp->hw_rh;
2369 		return (vgen_enable_intr(vresp->macreg.m_driver));
2370 	}
2371 }
2372 
2373 /*
2374  * Enable polling for a ring and disable its interrupt.
2375  */
2376 static int
2377 vnet_ring_disable_intr(void *arg)
2378 {
2379 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2380 	vnet_res_t		*vresp;
2381 
2382 	if (rx_ringp->hw_rh == NULL) {
2383 		/*
2384 		 * Ring disable intr func is being invoked, but the ring is
2385 		 * not bound to any underlying resource ? This must be a ring
2386 		 * reserved for Hybrid resource and no such resource has been
2387 		 * assigned to this vnet device yet. We simply return success.
2388 		 */
2389 		ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2390 		return (0);
2391 	}
2392 
2393 	/*
2394 	 * The rx ring has been bound to either a LDC or a Hybrid resource.
2395 	 * Call the appropriate function to disable interrupts for the ring.
2396 	 */
2397 	if (rx_ringp->state & VNET_RXRING_HYBRID) {
2398 		return (mac_hwring_disable_intr(rx_ringp->hw_rh));
2399 	} else {
2400 		vresp = (vnet_res_t *)rx_ringp->hw_rh;
2401 		return (vgen_disable_intr(vresp->macreg.m_driver));
2402 	}
2403 }
2404 
2405 /*
2406  * Poll 'bytes_to_pickup' bytes of message from the rx ring.
2407  */
2408 static mblk_t *
2409 vnet_rx_poll(void *arg, int bytes_to_pickup)
2410 {
2411 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2412 	mblk_t			*mp = NULL;
2413 	vnet_res_t		*vresp;
2414 	vnet_t			*vnetp = rx_ringp->vnetp;
2415 
2416 	if (rx_ringp->hw_rh == NULL) {
2417 		return (NULL);
2418 	}
2419 
2420 	if (rx_ringp->state & VNET_RXRING_HYBRID) {
2421 		mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup);
2422 		/*
2423 		 * Packets received over a hybrid resource need additional
2424 		 * processing to remove the tag, for the pvid case. The
2425 		 * underlying resource is not aware of the vnet's pvid and thus
2426 		 * packets are received with the vlan tag in the header; unlike
2427 		 * packets that are received over a ldc channel in which case
2428 		 * the peer vnet/vsw would have already removed the tag.
2429 		 */
2430 		if (vnetp->pvid != vnetp->default_vlan_id) {
2431 			vnet_rx_frames_untag(vnetp->pvid, &mp);
2432 		}
2433 	} else {
2434 		vresp = (vnet_res_t *)rx_ringp->hw_rh;
2435 		mp = vgen_poll(vresp->macreg.m_driver, bytes_to_pickup);
2436 	}
2437 	return (mp);
2438 }
2439 
2440 /* ARGSUSED */
2441 void
2442 vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
2443 	boolean_t loopback)
2444 {
2445 	vnet_t			*vnetp = (vnet_t *)arg;
2446 	vnet_pseudo_rx_ring_t	*ringp = (vnet_pseudo_rx_ring_t *)mrh;
2447 
2448 	/*
2449 	 * Packets received over a hybrid resource need additional processing
2450 	 * to remove the tag, for the pvid case. The underlying resource is
2451 	 * not aware of the vnet's pvid and thus packets are received with the
2452 	 * vlan tag in the header; unlike packets that are received over a ldc
2453 	 * channel in which case the peer vnet/vsw would have already removed
2454 	 * the tag.
2455 	 */
2456 	if (vnetp->pvid != vnetp->default_vlan_id) {
2457 		vnet_rx_frames_untag(vnetp->pvid, &mp);
2458 		if (mp == NULL) {
2459 			return;
2460 		}
2461 	}
2462 	mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
2463 }
2464 
2465 static int
2466 vnet_addmac(void *arg, const uint8_t *mac_addr)
2467 {
2468 	vnet_pseudo_rx_group_t  *rx_grp = (vnet_pseudo_rx_group_t *)arg;
2469 	vnet_t			*vnetp;
2470 
2471 	vnetp = rx_grp->vnetp;
2472 
2473 	if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
2474 		return (0);
2475 	}
2476 
2477 	cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n",
2478 	    vnetp->instance, __func__);
2479 	return (EINVAL);
2480 }
2481 
2482 static int
2483 vnet_remmac(void *arg, const uint8_t *mac_addr)
2484 {
2485 	vnet_pseudo_rx_group_t  *rx_grp = (vnet_pseudo_rx_group_t *)arg;
2486 	vnet_t			*vnetp;
2487 
2488 	vnetp = rx_grp->vnetp;
2489 
2490 	if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
2491 		return (0);
2492 	}
2493 
2494 	cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n",
2495 	    vnetp->instance, __func__, ether_sprintf((void *)mac_addr));
2496 	return (EINVAL);
2497 }
2498 
2499 int
2500 vnet_hio_mac_init(vnet_t *vnetp, char *ifname)
2501 {
2502 	mac_handle_t		mh;
2503 	mac_client_handle_t	mch = NULL;
2504 	mac_unicast_handle_t	muh = NULL;
2505 	mac_diag_t		diag;
2506 	mac_register_t		*macp;
2507 	char			client_name[MAXNAMELEN];
2508 	int			rv;
2509 	uint16_t		mac_flags = MAC_UNICAST_TAG_DISABLE |
2510 	    MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY;
2511 	vio_net_callbacks_t	vcb;
2512 	ether_addr_t		rem_addr =
2513 		{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
2514 	uint32_t		retries = 0;
2515 
2516 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2517 		return (EAGAIN);
2518 	}
2519 
2520 	do {
2521 		rv = mac_open_by_linkname(ifname, &mh);
2522 		if (rv == 0) {
2523 			break;
2524 		}
2525 		if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) {
2526 			mac_free(macp);
2527 			return (rv);
2528 		}
2529 		drv_usecwait(vnet_mac_open_delay);
2530 	} while (rv == ENOENT);
2531 
2532 	vnetp->hio_mh = mh;
2533 
2534 	(void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance,
2535 	    ifname);
2536 	rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE);
2537 	if (rv != 0) {
2538 		goto fail;
2539 	}
2540 	vnetp->hio_mch = mch;
2541 
2542 	rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0,
2543 	    &diag);
2544 	if (rv != 0) {
2545 		goto fail;
2546 	}
2547 	vnetp->hio_muh = muh;
2548 
2549 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2550 	macp->m_driver = vnetp;
2551 	macp->m_dip = NULL;
2552 	macp->m_src_addr = NULL;
2553 	macp->m_callbacks = &vnet_hio_res_callbacks;
2554 	macp->m_min_sdu = 0;
2555 	macp->m_max_sdu = ETHERMTU;
2556 
2557 	rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID,
2558 	    vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb);
2559 	if (rv != 0) {
2560 		goto fail;
2561 	}
2562 	mac_free(macp);
2563 
2564 	/* add the recv callback */
2565 	mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp);
2566 
2567 	/* add the notify callback - only tx updates for now */
2568 	vnetp->hio_mnh = mac_notify_add(vnetp->hio_mh, vnet_hio_notify_cb,
2569 	    vnetp);
2570 
2571 	return (0);
2572 
2573 fail:
2574 	mac_free(macp);
2575 	vnet_hio_mac_cleanup(vnetp);
2576 	return (1);
2577 }
2578 
2579 void
2580 vnet_hio_mac_cleanup(vnet_t *vnetp)
2581 {
2582 	if (vnetp->hio_mnh != NULL) {
2583 		(void) mac_notify_remove(vnetp->hio_mnh, B_TRUE);
2584 		vnetp->hio_mnh = NULL;
2585 	}
2586 
2587 	if (vnetp->hio_vhp != NULL) {
2588 		vio_net_resource_unreg(vnetp->hio_vhp);
2589 		vnetp->hio_vhp = NULL;
2590 	}
2591 
2592 	if (vnetp->hio_muh != NULL) {
2593 		(void) mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh);
2594 		vnetp->hio_muh = NULL;
2595 	}
2596 
2597 	if (vnetp->hio_mch != NULL) {
2598 		mac_client_close(vnetp->hio_mch, 0);
2599 		vnetp->hio_mch = NULL;
2600 	}
2601 
2602 	if (vnetp->hio_mh != NULL) {
2603 		mac_close(vnetp->hio_mh);
2604 		vnetp->hio_mh = NULL;
2605 	}
2606 }
2607 
2608 /* Bind pseudo rings to hwrings */
2609 static int
2610 vnet_bind_hwrings(vnet_t *vnetp)
2611 {
2612 	mac_ring_handle_t	hw_rh[VNET_NUM_HYBRID_RINGS];
2613 	mac_perim_handle_t	mph1;
2614 	vnet_pseudo_rx_group_t	*rx_grp;
2615 	vnet_pseudo_rx_ring_t	*rx_ringp;
2616 	vnet_pseudo_tx_group_t	*tx_grp;
2617 	vnet_pseudo_tx_ring_t	*tx_ringp;
2618 	int			hw_ring_cnt;
2619 	int			i;
2620 	int			rv;
2621 
2622 	mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
2623 
2624 	/* Get the list of the underlying RX rings. */
2625 	hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh,
2626 	    MAC_RING_TYPE_RX);
2627 
2628 	/* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */
2629 	if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
2630 		cmn_err(CE_WARN,
2631 		    "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n",
2632 		    vnetp->instance, hw_ring_cnt);
2633 		goto fail;
2634 	}
2635 
2636 	if (vnetp->rx_hwgh != NULL) {
2637 		/*
2638 		 * Quiesce the HW ring and the mac srs on the ring. Note
2639 		 * that the HW ring will be restarted when the pseudo ring
2640 		 * is started. At that time all the packets will be
2641 		 * directly passed up to the pseudo RX ring and handled
2642 		 * by mac srs created over the pseudo RX ring.
2643 		 */
2644 		mac_rx_client_quiesce(vnetp->hio_mch);
2645 		mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE);
2646 	}
2647 
2648 	/*
2649 	 * Bind the pseudo rings to the hwrings and start the hwrings.
2650 	 * Note we don't need to register these with the upper mac, as we have
2651 	 * statically exported these pseudo rxrings which are reserved for
2652 	 * rxrings of Hybrid resource.
2653 	 */
2654 	rx_grp = &vnetp->rx_grp[0];
2655 	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2656 		/* Pick the rxrings reserved for Hybrid resource */
2657 		rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
2658 
2659 		/* Store the hw ring handle */
2660 		rx_ringp->hw_rh = hw_rh[i];
2661 
2662 		/* Bind the pseudo ring to the underlying hwring */
2663 		mac_hwring_setup(rx_ringp->hw_rh,
2664 		    (mac_resource_handle_t)rx_ringp);
2665 
2666 		/* Start the hwring if needed */
2667 		if (rx_ringp->state & VNET_RXRING_STARTED) {
2668 			rv = mac_hwring_start(rx_ringp->hw_rh);
2669 			if (rv != 0) {
2670 				mac_hwring_teardown(rx_ringp->hw_rh);
2671 				rx_ringp->hw_rh = NULL;
2672 				goto fail;
2673 			}
2674 		}
2675 	}
2676 
2677 	/* Get the list of the underlying TX rings. */
2678 	hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh,
2679 	    MAC_RING_TYPE_TX);
2680 
2681 	/* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */
2682 	if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
2683 		cmn_err(CE_WARN,
2684 		    "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n",
2685 		    vnetp->instance, hw_ring_cnt);
2686 		goto fail;
2687 	}
2688 
2689 	/*
2690 	 * Now map the pseudo txrings to the hw txrings. Note we don't need
2691 	 * to register these with the upper mac, as we have statically exported
2692 	 * these rings. Note that these rings will continue to be used for LDC
2693 	 * resources to peer vnets and vswitch (shared ring).
2694 	 */
2695 	tx_grp = &vnetp->tx_grp[0];
2696 	for (i = 0; i < tx_grp->ring_cnt; i++) {
2697 		tx_ringp = &tx_grp->rings[i];
2698 		tx_ringp->hw_rh = hw_rh[i];
2699 		tx_ringp->state |= VNET_TXRING_HYBRID;
2700 	}
2701 
2702 	mac_perim_exit(mph1);
2703 	return (0);
2704 
2705 fail:
2706 	mac_perim_exit(mph1);
2707 	vnet_unbind_hwrings(vnetp);
2708 	return (1);
2709 }
2710 
2711 /* Unbind pseudo rings from hwrings */
2712 static void
2713 vnet_unbind_hwrings(vnet_t *vnetp)
2714 {
2715 	mac_perim_handle_t	mph1;
2716 	vnet_pseudo_rx_ring_t	*rx_ringp;
2717 	vnet_pseudo_rx_group_t	*rx_grp;
2718 	vnet_pseudo_tx_group_t	*tx_grp;
2719 	vnet_pseudo_tx_ring_t	*tx_ringp;
2720 	int			i;
2721 
2722 	mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
2723 
2724 	tx_grp = &vnetp->tx_grp[0];
2725 	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2726 		tx_ringp = &tx_grp->rings[i];
2727 		if (tx_ringp->state & VNET_TXRING_HYBRID) {
2728 			tx_ringp->state &= ~VNET_TXRING_HYBRID;
2729 			tx_ringp->hw_rh = NULL;
2730 		}
2731 	}
2732 
2733 	rx_grp = &vnetp->rx_grp[0];
2734 	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2735 		rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
2736 		if (rx_ringp->hw_rh != NULL) {
2737 			/* Stop the hwring */
2738 			mac_hwring_stop(rx_ringp->hw_rh);
2739 
2740 			/* Teardown the hwring */
2741 			mac_hwring_teardown(rx_ringp->hw_rh);
2742 			rx_ringp->hw_rh = NULL;
2743 		}
2744 	}
2745 
2746 	if (vnetp->rx_hwgh != NULL) {
2747 		vnetp->rx_hwgh = NULL;
2748 		/*
2749 		 * First clear the permanent-quiesced flag of the RX srs then
2750 		 * restart the HW ring and the mac srs on the ring.
2751 		 */
2752 		mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE);
2753 		mac_rx_client_restart(vnetp->hio_mch);
2754 	}
2755 
2756 	mac_perim_exit(mph1);
2757 }
2758 
2759 /* Bind pseudo ring to a LDC resource */
2760 static int
2761 vnet_bind_vgenring(vnet_res_t *vresp)
2762 {
2763 	vnet_t			*vnetp;
2764 	vnet_pseudo_rx_group_t	*rx_grp;
2765 	vnet_pseudo_rx_ring_t	*rx_ringp;
2766 	mac_perim_handle_t	mph1;
2767 	int			rv;
2768 	int			type;
2769 
2770 	vnetp = vresp->vnetp;
2771 	type = vresp->type;
2772 	rx_grp = &vnetp->rx_grp[0];
2773 
2774 	if (type == VIO_NET_RES_LDC_SERVICE) {
2775 		/*
2776 		 * Ring Index 0 is the default ring in the group and is
2777 		 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
2778 		 * is allocated statically and is reported to the mac layer
2779 		 * in vnet_m_capab(). So, all we need to do here, is save a
2780 		 * reference to the associated vresp.
2781 		 */
2782 		rx_ringp = &rx_grp->rings[0];
2783 		rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
2784 		vresp->rx_ringp = (void *)rx_ringp;
2785 		return (0);
2786 	}
2787 	ASSERT(type == VIO_NET_RES_LDC_GUEST);
2788 
2789 	mac_perim_enter_by_mh(vnetp->mh, &mph1);
2790 
2791 	rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp);
2792 	if (rx_ringp == NULL) {
2793 		cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring",
2794 		    vnetp->instance);
2795 		goto fail;
2796 	}
2797 
2798 	/* Store the LDC resource itself as the ring handle */
2799 	rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
2800 
2801 	/*
2802 	 * Save a reference to the ring in the resource for lookup during
2803 	 * unbind. Note this is only done for LDC resources. We don't need this
2804 	 * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its
2805 	 * rx rings are mapped to reserved pseudo rx rings (index 1 and 2).
2806 	 */
2807 	vresp->rx_ringp = (void *)rx_ringp;
2808 	rx_ringp->state |= VNET_RXRING_LDC_GUEST;
2809 
2810 	/* Register the pseudo ring with upper-mac */
2811 	rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index);
2812 	if (rv != 0) {
2813 		rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
2814 		rx_ringp->hw_rh = NULL;
2815 		vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
2816 		goto fail;
2817 	}
2818 
2819 	mac_perim_exit(mph1);
2820 	return (0);
2821 fail:
2822 	mac_perim_exit(mph1);
2823 	return (1);
2824 }
2825 
2826 /* Unbind pseudo ring from a LDC resource */
2827 static void
2828 vnet_unbind_vgenring(vnet_res_t *vresp)
2829 {
2830 	vnet_t			*vnetp;
2831 	vnet_pseudo_rx_group_t	*rx_grp;
2832 	vnet_pseudo_rx_ring_t	*rx_ringp;
2833 	mac_perim_handle_t	mph1;
2834 	int			type;
2835 
2836 	vnetp = vresp->vnetp;
2837 	type = vresp->type;
2838 	rx_grp = &vnetp->rx_grp[0];
2839 
2840 	if (vresp->rx_ringp == NULL) {
2841 		return;
2842 	}
2843 
2844 	if (type == VIO_NET_RES_LDC_SERVICE) {
2845 		/*
2846 		 * Ring Index 0 is the default ring in the group and is
2847 		 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
2848 		 * is allocated statically and is reported to the mac layer
2849 		 * in vnet_m_capab(). So, all we need to do here, is remove its
2850 		 * reference to the associated vresp.
2851 		 */
2852 		rx_ringp = &rx_grp->rings[0];
2853 		rx_ringp->hw_rh = NULL;
2854 		vresp->rx_ringp = NULL;
2855 		return;
2856 	}
2857 	ASSERT(type == VIO_NET_RES_LDC_GUEST);
2858 
2859 	mac_perim_enter_by_mh(vnetp->mh, &mph1);
2860 
2861 	rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp;
2862 	vresp->rx_ringp = NULL;
2863 
2864 	if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) {
2865 		/* Unregister the pseudo ring with upper-mac */
2866 		mac_group_rem_ring(rx_grp->handle, rx_ringp->handle);
2867 
2868 		rx_ringp->hw_rh = NULL;
2869 		rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
2870 
2871 		/* Free the pseudo rx ring */
2872 		vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
2873 	}
2874 
2875 	mac_perim_exit(mph1);
2876 }
2877 
2878 static void
2879 vnet_unbind_rings(vnet_res_t *vresp)
2880 {
2881 	switch (vresp->type) {
2882 
2883 	case VIO_NET_RES_LDC_SERVICE:
2884 	case VIO_NET_RES_LDC_GUEST:
2885 		vnet_unbind_vgenring(vresp);
2886 		break;
2887 
2888 	case VIO_NET_RES_HYBRID:
2889 		vnet_unbind_hwrings(vresp->vnetp);
2890 		break;
2891 
2892 	default:
2893 		break;
2894 
2895 	}
2896 }
2897 
2898 static int
2899 vnet_bind_rings(vnet_res_t *vresp)
2900 {
2901 	int	rv;
2902 
2903 	switch (vresp->type) {
2904 
2905 	case VIO_NET_RES_LDC_SERVICE:
2906 	case VIO_NET_RES_LDC_GUEST:
2907 		rv = vnet_bind_vgenring(vresp);
2908 		break;
2909 
2910 	case VIO_NET_RES_HYBRID:
2911 		rv = vnet_bind_hwrings(vresp->vnetp);
2912 		break;
2913 
2914 	default:
2915 		rv = 1;
2916 		break;
2917 
2918 	}
2919 
2920 	return (rv);
2921 }
2922 
2923 /* ARGSUSED */
2924 int
2925 vnet_hio_stat(void *arg, uint_t stat, uint64_t *val)
2926 {
2927 	vnet_t	*vnetp = (vnet_t *)arg;
2928 
2929 	*val = mac_stat_get(vnetp->hio_mh, stat);
2930 	return (0);
2931 }
2932 
2933 /*
2934  * The start() and stop() routines for the Hybrid resource below, are just
2935  * dummy functions. This is provided to avoid resource type specific code in
2936  * vnet_start_resources() and vnet_stop_resources(). The starting and stopping
2937  * of the Hybrid resource happens in the context of the mac_client interfaces
2938  * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup().
2939  */
2940 /* ARGSUSED */
2941 static int
2942 vnet_hio_start(void *arg)
2943 {
2944 	return (0);
2945 }
2946 
2947 /* ARGSUSED */
2948 static void
2949 vnet_hio_stop(void *arg)
2950 {
2951 }
2952 
2953 mblk_t *
2954 vnet_hio_tx(void *arg, mblk_t *mp)
2955 {
2956 	vnet_pseudo_tx_ring_t	*tx_ringp;
2957 	mblk_t			*nextp;
2958 	mblk_t			*ret_mp;
2959 
2960 	tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
2961 	for (;;) {
2962 		nextp = mp->b_next;
2963 		mp->b_next = NULL;
2964 
2965 		ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp);
2966 		if (ret_mp != NULL) {
2967 			ret_mp->b_next = nextp;
2968 			mp = ret_mp;
2969 			break;
2970 		}
2971 
2972 		if ((mp = nextp) == NULL)
2973 			break;
2974 	}
2975 	return (mp);
2976 }
2977 
2978 static void
2979 vnet_hio_notify_cb(void *arg, mac_notify_type_t type)
2980 {
2981 	vnet_t			*vnetp = (vnet_t *)arg;
2982 	mac_perim_handle_t	mph;
2983 
2984 	mac_perim_enter_by_mh(vnetp->hio_mh, &mph);
2985 	switch (type) {
2986 	case MAC_NOTE_TX:
2987 		vnet_tx_update(vnetp->hio_vhp);
2988 		break;
2989 
2990 	default:
2991 		break;
2992 	}
2993 	mac_perim_exit(mph);
2994 }
2995 
2996 #ifdef	VNET_IOC_DEBUG
2997 
2998 /*
2999  * The ioctl entry point is used only for debugging for now. The ioctl commands
3000  * can be used to force the link state of the channel connected to vsw.
3001  */
3002 static void
3003 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
3004 {
3005 	struct iocblk	*iocp;
3006 	vnet_t		*vnetp;
3007 
3008 	iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
3009 	iocp->ioc_error = 0;
3010 	vnetp = (vnet_t *)arg;
3011 
3012 	if (vnetp == NULL) {
3013 		miocnak(q, mp, 0, EINVAL);
3014 		return;
3015 	}
3016 
3017 	switch (iocp->ioc_cmd) {
3018 
3019 	case VNET_FORCE_LINK_DOWN:
3020 	case VNET_FORCE_LINK_UP:
3021 		vnet_force_link_state(vnetp, q, mp);
3022 		break;
3023 
3024 	default:
3025 		iocp->ioc_error = EINVAL;
3026 		miocnak(q, mp, 0, iocp->ioc_error);
3027 		break;
3028 
3029 	}
3030 }
3031 
3032 static void
3033 vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp)
3034 {
3035 	mac_register_t	*macp;
3036 	mac_callbacks_t	*cbp;
3037 	vnet_res_t	*vresp;
3038 
3039 	READ_ENTER(&vnetp->vsw_fp_rw);
3040 
3041 	vresp = vnetp->vsw_fp;
3042 	if (vresp == NULL) {
3043 		RW_EXIT(&vnetp->vsw_fp_rw);
3044 		return;
3045 	}
3046 
3047 	macp = &vresp->macreg;
3048 	cbp = macp->m_callbacks;
3049 	cbp->mc_ioctl(macp->m_driver, q, mp);
3050 
3051 	RW_EXIT(&vnetp->vsw_fp_rw);
3052 }
3053 
3054 #else
3055 
3056 static void
3057 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
3058 {
3059 	vnet_t		*vnetp;
3060 
3061 	vnetp = (vnet_t *)arg;
3062 
3063 	if (vnetp == NULL) {
3064 		miocnak(q, mp, 0, EINVAL);
3065 		return;
3066 	}
3067 
3068 	/* ioctl support only for debugging */
3069 	miocnak(q, mp, 0, ENOTSUP);
3070 }
3071 
3072 #endif
3073