xref: /titanic_52/usr/src/uts/sun4v/io/vnet.c (revision efd31e1d839d4665462b5c267a1c654548082663)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/param.h>
30 #include <sys/callb.h>
31 #include <sys/stream.h>
32 #include <sys/kmem.h>
33 #include <sys/conf.h>
34 #include <sys/devops.h>
35 #include <sys/ksynch.h>
36 #include <sys/stat.h>
37 #include <sys/modctl.h>
38 #include <sys/modhash.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/dlpi.h>
42 #include <net/if.h>
43 #include <sys/mac_provider.h>
44 #include <sys/mac_client.h>
45 #include <sys/mac_client_priv.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/strsun.h>
50 #include <sys/note.h>
51 #include <sys/atomic.h>
52 #include <sys/vnet.h>
53 #include <sys/vlan.h>
54 #include <sys/vnet_mailbox.h>
55 #include <sys/vnet_common.h>
56 #include <sys/dds.h>
57 #include <sys/strsubr.h>
58 #include <sys/taskq.h>
59 
60 /*
61  * Function prototypes.
62  */
63 
64 /* DDI entrypoints */
65 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
66 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
67 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
68 
69 /* MAC entrypoints  */
70 static int vnet_m_stat(void *, uint_t, uint64_t *);
71 static int vnet_m_start(void *);
72 static void vnet_m_stop(void *);
73 static int vnet_m_promisc(void *, boolean_t);
74 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
75 static int vnet_m_unicst(void *, const uint8_t *);
76 mblk_t *vnet_m_tx(void *, mblk_t *);
77 static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp);
78 #ifdef	VNET_IOC_DEBUG
79 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp);
80 #endif
81 static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data);
82 static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
83 	const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle);
84 static void vnet_get_group(void *arg, mac_ring_type_t type, const int index,
85 	mac_group_info_t *infop, mac_group_handle_t handle);
86 static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
87 static void vnet_rx_ring_stop(mac_ring_driver_t rdriver);
88 static int vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat,
89 	uint64_t *val);
90 static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
91 static void vnet_tx_ring_stop(mac_ring_driver_t rdriver);
92 static int vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat,
93 	uint64_t *val);
94 static int vnet_ring_enable_intr(void *arg);
95 static int vnet_ring_disable_intr(void *arg);
96 static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup);
97 static int vnet_addmac(void *arg, const uint8_t *mac_addr);
98 static int vnet_remmac(void *arg, const uint8_t *mac_addr);
99 
100 /* vnet internal functions */
101 static int vnet_unattach(vnet_t *vnetp);
102 static void vnet_ring_grp_init(vnet_t *vnetp);
103 static void vnet_ring_grp_uninit(vnet_t *vnetp);
104 static int vnet_mac_register(vnet_t *);
105 static int vnet_read_mac_address(vnet_t *vnetp);
106 static int vnet_bind_vgenring(vnet_res_t *vresp);
107 static void vnet_unbind_vgenring(vnet_res_t *vresp);
108 static int vnet_bind_hwrings(vnet_t *vnetp);
109 static void vnet_unbind_hwrings(vnet_t *vnetp);
110 static int vnet_bind_rings(vnet_res_t *vresp);
111 static void vnet_unbind_rings(vnet_res_t *vresp);
112 static int vnet_hio_stat(void *, uint_t, uint64_t *);
113 static int vnet_hio_start(void *);
114 static void vnet_hio_stop(void *);
115 mblk_t *vnet_hio_tx(void *, mblk_t *);
116 
117 /* Forwarding database (FDB) routines */
118 static void vnet_fdb_create(vnet_t *vnetp);
119 static void vnet_fdb_destroy(vnet_t *vnetp);
120 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp);
121 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
122 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp);
123 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp);
124 
125 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp);
126 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp);
127 static void vnet_tx_update(vio_net_handle_t vrh);
128 static void vnet_res_start_task(void *arg);
129 static void vnet_start_resources(vnet_t *vnetp);
130 static void vnet_stop_resources(vnet_t *vnetp);
131 static void vnet_dispatch_res_task(vnet_t *vnetp);
132 static void vnet_res_start_task(void *arg);
133 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
134 static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp);
135 static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp);
136 static void vnet_tx_notify_thread(void *);
137 
138 /* Exported to vnet_gen */
139 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
140 void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
141 void vnet_dds_cleanup_hio(vnet_t *vnetp);
142 
143 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name,
144     vnet_res_t *vresp);
145 static int vnet_hio_update_kstats(kstat_t *ksp, int rw);
146 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp);
147 static void vnet_hio_destroy_kstats(kstat_t *ksp);
148 
149 /* Exported to to vnet_dds */
150 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
151 int vnet_hio_mac_init(vnet_t *vnetp, char *ifname);
152 void vnet_hio_mac_cleanup(vnet_t *vnetp);
153 
154 /* Externs that are imported from vnet_gen */
155 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
156     const uint8_t *macaddr, void **vgenhdl);
157 extern int vgen_init_mdeg(void *arg);
158 extern void vgen_uninit(void *arg);
159 extern int vgen_dds_tx(void *arg, void *dmsg);
160 extern int vgen_enable_intr(void *arg);
161 extern int vgen_disable_intr(void *arg);
162 extern mblk_t *vgen_rx_poll(void *arg, int bytes_to_pickup);
163 
164 /* Externs that are imported from vnet_dds */
165 extern void vdds_mod_init(void);
166 extern void vdds_mod_fini(void);
167 extern int vdds_init(vnet_t *vnetp);
168 extern void vdds_cleanup(vnet_t *vnetp);
169 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
170 extern void vdds_cleanup_hybrid_res(void *arg);
171 extern void vdds_cleanup_hio(vnet_t *vnetp);
172 
173 extern pri_t	minclsyspri;
174 
175 #define	DRV_NAME	"vnet"
176 #define	VNET_FDBE_REFHOLD(p)						\
177 {									\
178 	atomic_inc_32(&(p)->refcnt);					\
179 	ASSERT((p)->refcnt != 0);					\
180 }
181 
182 #define	VNET_FDBE_REFRELE(p)						\
183 {									\
184 	ASSERT((p)->refcnt != 0);					\
185 	atomic_dec_32(&(p)->refcnt);					\
186 }
187 
188 #ifdef	VNET_IOC_DEBUG
189 #define	VNET_M_CALLBACK_FLAGS	(MC_IOCTL | MC_GETCAPAB)
190 #else
191 #define	VNET_M_CALLBACK_FLAGS	(MC_GETCAPAB)
192 #endif
193 
194 static mac_callbacks_t vnet_m_callbacks = {
195 	VNET_M_CALLBACK_FLAGS,
196 	vnet_m_stat,
197 	vnet_m_start,
198 	vnet_m_stop,
199 	vnet_m_promisc,
200 	vnet_m_multicst,
201 	NULL,	/* m_unicst entry must be NULL while rx rings are exposed */
202 	NULL,	/* m_tx entry must be NULL while tx rings are exposed */
203 	NULL,
204 	vnet_m_ioctl,
205 	vnet_m_capab,
206 	NULL
207 };
208 
209 static mac_callbacks_t vnet_hio_res_callbacks = {
210 	0,
211 	vnet_hio_stat,
212 	vnet_hio_start,
213 	vnet_hio_stop,
214 	NULL,
215 	NULL,
216 	NULL,
217 	vnet_hio_tx,
218 	NULL,
219 	NULL,
220 	NULL
221 };
222 
223 /*
224  * Linked list of "vnet_t" structures - one per instance.
225  */
226 static vnet_t	*vnet_headp = NULL;
227 static krwlock_t vnet_rw;
228 
229 /* Tunables */
230 uint32_t vnet_num_descriptors = VNET_NUM_DESCRIPTORS;
231 
232 /*
233  * Configure tx serialization in mac layer for the vnet device. This tunable
234  * should be enabled to improve performance only if HybridIO is configured for
235  * the vnet device.
236  */
237 boolean_t vnet_mac_tx_serialize = B_FALSE;
238 
239 /* Configure enqueing at Rx soft rings in mac layer for the vnet device */
240 boolean_t vnet_mac_rx_queuing = B_TRUE;
241 
242 /*
243  * Set this to non-zero to enable additional internal receive buffer pools
244  * based on the MTU of the device for better performance at the cost of more
245  * memory consumption. This is turned off by default, to use allocb(9F) for
246  * receive buffer allocations of sizes > 2K.
247  */
248 boolean_t vnet_jumbo_rxpools = B_FALSE;
249 
250 /* # of chains in fdb hash table */
251 uint32_t	vnet_fdb_nchains = VNET_NFDB_HASH;
252 
253 /* Internal tunables */
254 uint32_t	vnet_ethermtu = 1500;	/* mtu of the device */
255 
256 /*
257  * Default vlan id. This is only used internally when the "default-vlan-id"
258  * property is not present in the MD device node. Therefore, this should not be
259  * used as a tunable; if this value is changed, the corresponding variable
260  * should be updated to the same value in vsw and also other vnets connected to
261  * the same vsw.
262  */
263 uint16_t	vnet_default_vlan_id = 1;
264 
265 /* delay in usec to wait for all references on a fdb entry to be dropped */
266 uint32_t vnet_fdbe_refcnt_delay = 10;
267 
268 static struct ether_addr etherbroadcastaddr = {
269 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
270 };
271 
272 /* mac_open() retry delay in usec */
273 uint32_t vnet_mac_open_delay = 100;	/* 0.1 ms */
274 
275 /* max # of mac_open() retries */
276 uint32_t vnet_mac_open_retries = 100;
277 
278 /*
279  * Property names
280  */
281 static char macaddr_propname[] = "local-mac-address";
282 
283 /*
284  * This is the string displayed by modinfo(1m).
285  */
286 static char vnet_ident[] = "vnet driver";
287 extern struct mod_ops mod_driverops;
288 static struct cb_ops cb_vnetops = {
289 	nulldev,		/* cb_open */
290 	nulldev,		/* cb_close */
291 	nodev,			/* cb_strategy */
292 	nodev,			/* cb_print */
293 	nodev,			/* cb_dump */
294 	nodev,			/* cb_read */
295 	nodev,			/* cb_write */
296 	nodev,			/* cb_ioctl */
297 	nodev,			/* cb_devmap */
298 	nodev,			/* cb_mmap */
299 	nodev,			/* cb_segmap */
300 	nochpoll,		/* cb_chpoll */
301 	ddi_prop_op,		/* cb_prop_op */
302 	NULL,			/* cb_stream */
303 	(int)(D_MP)		/* cb_flag */
304 };
305 
306 static struct dev_ops vnetops = {
307 	DEVO_REV,		/* devo_rev */
308 	0,			/* devo_refcnt */
309 	NULL,			/* devo_getinfo */
310 	nulldev,		/* devo_identify */
311 	nulldev,		/* devo_probe */
312 	vnetattach,		/* devo_attach */
313 	vnetdetach,		/* devo_detach */
314 	nodev,			/* devo_reset */
315 	&cb_vnetops,		/* devo_cb_ops */
316 	(struct bus_ops *)NULL,	/* devo_bus_ops */
317 	NULL,			/* devo_power */
318 	ddi_quiesce_not_supported,	/* devo_quiesce */
319 };
320 
321 static struct modldrv modldrv = {
322 	&mod_driverops,		/* Type of module.  This one is a driver */
323 	vnet_ident,		/* ID string */
324 	&vnetops		/* driver specific ops */
325 };
326 
327 static struct modlinkage modlinkage = {
328 	MODREV_1, (void *)&modldrv, NULL
329 };
330 
331 #ifdef DEBUG
332 
333 #define	DEBUG_PRINTF	debug_printf
334 
335 /*
336  * Print debug messages - set to 0xf to enable all msgs
337  */
338 int vnet_dbglevel = 0x8;
339 
340 static void
341 debug_printf(const char *fname, void *arg, const char *fmt, ...)
342 {
343 	char    buf[512];
344 	va_list ap;
345 	vnet_t *vnetp = (vnet_t *)arg;
346 	char    *bufp = buf;
347 
348 	if (vnetp == NULL) {
349 		(void) sprintf(bufp, "%s: ", fname);
350 		bufp += strlen(bufp);
351 	} else {
352 		(void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
353 		bufp += strlen(bufp);
354 	}
355 	va_start(ap, fmt);
356 	(void) vsprintf(bufp, fmt, ap);
357 	va_end(ap);
358 	cmn_err(CE_CONT, "%s\n", buf);
359 }
360 
361 #endif
362 
363 /* _init(9E): initialize the loadable module */
364 int
365 _init(void)
366 {
367 	int status;
368 
369 	DBG1(NULL, "enter\n");
370 
371 	mac_init_ops(&vnetops, "vnet");
372 	status = mod_install(&modlinkage);
373 	if (status != 0) {
374 		mac_fini_ops(&vnetops);
375 	}
376 	vdds_mod_init();
377 	DBG1(NULL, "exit(%d)\n", status);
378 	return (status);
379 }
380 
381 /* _fini(9E): prepare the module for unloading. */
382 int
383 _fini(void)
384 {
385 	int		status;
386 
387 	DBG1(NULL, "enter\n");
388 
389 	status = mod_remove(&modlinkage);
390 	if (status != 0)
391 		return (status);
392 	mac_fini_ops(&vnetops);
393 	vdds_mod_fini();
394 
395 	DBG1(NULL, "exit(%d)\n", status);
396 	return (status);
397 }
398 
399 /* _info(9E): return information about the loadable module */
400 int
401 _info(struct modinfo *modinfop)
402 {
403 	return (mod_info(&modlinkage, modinfop));
404 }
405 
406 /*
407  * attach(9E): attach a device to the system.
408  * called once for each instance of the device on the system.
409  */
410 static int
411 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
412 {
413 	vnet_t			*vnetp;
414 	int			status;
415 	int			instance;
416 	uint64_t		reg;
417 	char			qname[TASKQ_NAMELEN];
418 	vnet_attach_progress_t	attach_progress;
419 
420 	attach_progress = AST_init;
421 
422 	switch (cmd) {
423 	case DDI_ATTACH:
424 		break;
425 	case DDI_RESUME:
426 	case DDI_PM_RESUME:
427 	default:
428 		goto vnet_attach_fail;
429 	}
430 
431 	instance = ddi_get_instance(dip);
432 	DBG1(NULL, "instance(%d) enter\n", instance);
433 
434 	/* allocate vnet_t and mac_t structures */
435 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
436 	vnetp->dip = dip;
437 	vnetp->instance = instance;
438 	rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL);
439 	rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
440 	attach_progress |= AST_vnet_alloc;
441 
442 	vnet_ring_grp_init(vnetp);
443 	attach_progress |= AST_ring_init;
444 
445 	status = vdds_init(vnetp);
446 	if (status != 0) {
447 		goto vnet_attach_fail;
448 	}
449 	attach_progress |= AST_vdds_init;
450 
451 	/* setup links to vnet_t from both devinfo and mac_t */
452 	ddi_set_driver_private(dip, (caddr_t)vnetp);
453 
454 	/* read the mac address */
455 	status = vnet_read_mac_address(vnetp);
456 	if (status != DDI_SUCCESS) {
457 		goto vnet_attach_fail;
458 	}
459 	attach_progress |= AST_read_macaddr;
460 
461 	reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
462 	    DDI_PROP_DONTPASS, "reg", -1);
463 	if (reg == -1) {
464 		goto vnet_attach_fail;
465 	}
466 	vnetp->reg = reg;
467 
468 	vnet_fdb_create(vnetp);
469 	attach_progress |= AST_fdbh_alloc;
470 
471 	(void) snprintf(qname, TASKQ_NAMELEN, "vres_taskq%d", instance);
472 	if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1,
473 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
474 		cmn_err(CE_WARN, "!vnet%d: Unable to create task queue",
475 		    instance);
476 		goto vnet_attach_fail;
477 	}
478 	attach_progress |= AST_taskq_create;
479 
480 	/* add to the list of vnet devices */
481 	WRITE_ENTER(&vnet_rw);
482 	vnetp->nextp = vnet_headp;
483 	vnet_headp = vnetp;
484 	RW_EXIT(&vnet_rw);
485 
486 	attach_progress |= AST_vnet_list;
487 
488 	/*
489 	 * Initialize the generic vnet plugin which provides communication via
490 	 * sun4v LDC (logical domain channel) based resources. This involves 2
491 	 * steps; first, vgen_init() is invoked to read the various properties
492 	 * of the vnet device from its MD node (including its mtu which is
493 	 * needed to mac_register()) and obtain a handle to the vgen layer.
494 	 * After mac_register() is done and we have a mac handle, we then
495 	 * invoke vgen_init_mdeg() which registers with the the MD event
496 	 * generator (mdeg) framework to allow LDC resource notifications.
497 	 * Note: this sequence also allows us to report the correct default #
498 	 * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked
499 	 * in the context of mac_register(); and avoids conflicting with
500 	 * dynamic pseudo rx rings which get added/removed as a result of mdeg
501 	 * events in vgen.
502 	 */
503 	status = vgen_init(vnetp, reg, vnetp->dip,
504 	    (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
505 	if (status != DDI_SUCCESS) {
506 		DERR(vnetp, "vgen_init() failed\n");
507 		goto vnet_attach_fail;
508 	}
509 	attach_progress |= AST_vgen_init;
510 
511 	status = vnet_mac_register(vnetp);
512 	if (status != DDI_SUCCESS) {
513 		goto vnet_attach_fail;
514 	}
515 	vnetp->link_state = LINK_STATE_UNKNOWN;
516 	attach_progress |= AST_macreg;
517 
518 	status = vgen_init_mdeg(vnetp->vgenhdl);
519 	if (status != DDI_SUCCESS) {
520 		goto vnet_attach_fail;
521 	}
522 	attach_progress |= AST_init_mdeg;
523 
524 	vnetp->attach_progress = attach_progress;
525 
526 	DBG1(NULL, "instance(%d) exit\n", instance);
527 	return (DDI_SUCCESS);
528 
529 vnet_attach_fail:
530 	vnetp->attach_progress = attach_progress;
531 	status = vnet_unattach(vnetp);
532 	ASSERT(status == 0);
533 	return (DDI_FAILURE);
534 }
535 
536 /*
537  * detach(9E): detach a device from the system.
538  */
539 static int
540 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
541 {
542 	vnet_t		*vnetp;
543 	int		instance;
544 
545 	instance = ddi_get_instance(dip);
546 	DBG1(NULL, "instance(%d) enter\n", instance);
547 
548 	vnetp = ddi_get_driver_private(dip);
549 	if (vnetp == NULL) {
550 		goto vnet_detach_fail;
551 	}
552 
553 	switch (cmd) {
554 	case DDI_DETACH:
555 		break;
556 	case DDI_SUSPEND:
557 	case DDI_PM_SUSPEND:
558 	default:
559 		goto vnet_detach_fail;
560 	}
561 
562 	if (vnet_unattach(vnetp) != 0) {
563 		goto vnet_detach_fail;
564 	}
565 
566 	return (DDI_SUCCESS);
567 
568 vnet_detach_fail:
569 	return (DDI_FAILURE);
570 }
571 
572 /*
573  * Common routine to handle vnetattach() failure and vnetdetach(). Note that
574  * the only reason this function could fail is if mac_unregister() fails.
575  * Otherwise, this function must ensure that all resources are freed and return
576  * success.
577  */
578 static int
579 vnet_unattach(vnet_t *vnetp)
580 {
581 	vnet_attach_progress_t	attach_progress;
582 
583 	attach_progress = vnetp->attach_progress;
584 
585 	/*
586 	 * Disable the mac device in the gldv3 subsystem. This can fail, in
587 	 * particular if there are still any open references to this mac
588 	 * device; in which case we just return failure without continuing to
589 	 * detach further.
590 	 * If it succeeds, we then invoke vgen_uninit() which should unregister
591 	 * any pseudo rings registered with the mac layer. Note we keep the
592 	 * AST_macreg flag on, so we can unregister with the mac layer at
593 	 * the end of this routine.
594 	 */
595 	if (attach_progress & AST_macreg) {
596 		if (mac_disable(vnetp->mh) != 0) {
597 			return (1);
598 		}
599 	}
600 
601 	/*
602 	 * Now that we have disabled the device, we must finish all other steps
603 	 * and successfully return from this function; otherwise we will end up
604 	 * leaving the device in a broken/unusable state.
605 	 *
606 	 * First, release any hybrid resources assigned to this vnet device.
607 	 */
608 	if (attach_progress & AST_vdds_init) {
609 		vdds_cleanup(vnetp);
610 		attach_progress &= ~AST_vdds_init;
611 	}
612 
613 	/*
614 	 * Uninit vgen. This stops further mdeg callbacks to this vnet
615 	 * device and/or its ports; and detaches any existing ports.
616 	 */
617 	if (attach_progress & (AST_vgen_init|AST_init_mdeg)) {
618 		vgen_uninit(vnetp->vgenhdl);
619 		attach_progress &= ~AST_vgen_init;
620 		attach_progress &= ~AST_init_mdeg;
621 	}
622 
623 	/* Destroy the taskq. */
624 	if (attach_progress & AST_taskq_create) {
625 		ddi_taskq_destroy(vnetp->taskqp);
626 		attach_progress &= ~AST_taskq_create;
627 	}
628 
629 	/* Destroy fdb. */
630 	if (attach_progress & AST_fdbh_alloc) {
631 		vnet_fdb_destroy(vnetp);
632 		attach_progress &= ~AST_fdbh_alloc;
633 	}
634 
635 	/* Remove from the device list */
636 	if (attach_progress & AST_vnet_list) {
637 		vnet_t		**vnetpp;
638 		/* unlink from instance(vnet_t) list */
639 		WRITE_ENTER(&vnet_rw);
640 		for (vnetpp = &vnet_headp; *vnetpp;
641 		    vnetpp = &(*vnetpp)->nextp) {
642 			if (*vnetpp == vnetp) {
643 				*vnetpp = vnetp->nextp;
644 				break;
645 			}
646 		}
647 		RW_EXIT(&vnet_rw);
648 		attach_progress &= ~AST_vnet_list;
649 	}
650 
651 	if (attach_progress & AST_ring_init) {
652 		vnet_ring_grp_uninit(vnetp);
653 		attach_progress &= ~AST_ring_init;
654 	}
655 
656 	if (attach_progress & AST_macreg) {
657 		VERIFY(mac_unregister(vnetp->mh) == 0);
658 		vnetp->mh = NULL;
659 		attach_progress &= ~AST_macreg;
660 	}
661 
662 	if (attach_progress & AST_vnet_alloc) {
663 		rw_destroy(&vnetp->vrwlock);
664 		rw_destroy(&vnetp->vsw_fp_rw);
665 		attach_progress &= ~AST_vnet_list;
666 		KMEM_FREE(vnetp);
667 	}
668 
669 	return (0);
670 }
671 
672 /* enable the device for transmit/receive */
673 static int
674 vnet_m_start(void *arg)
675 {
676 	vnet_t		*vnetp = arg;
677 
678 	DBG1(vnetp, "enter\n");
679 
680 	WRITE_ENTER(&vnetp->vrwlock);
681 	vnetp->flags |= VNET_STARTED;
682 	vnet_start_resources(vnetp);
683 	RW_EXIT(&vnetp->vrwlock);
684 
685 	DBG1(vnetp, "exit\n");
686 	return (VNET_SUCCESS);
687 
688 }
689 
690 /* stop transmit/receive for the device */
691 static void
692 vnet_m_stop(void *arg)
693 {
694 	vnet_t		*vnetp = arg;
695 
696 	DBG1(vnetp, "enter\n");
697 
698 	WRITE_ENTER(&vnetp->vrwlock);
699 	if (vnetp->flags & VNET_STARTED) {
700 		/*
701 		 * Set the flags appropriately; this should prevent starting of
702 		 * any new resources that are added(see vnet_res_start_task()),
703 		 * while we release the vrwlock in vnet_stop_resources() before
704 		 * stopping each resource.
705 		 */
706 		vnetp->flags &= ~VNET_STARTED;
707 		vnetp->flags |= VNET_STOPPING;
708 		vnet_stop_resources(vnetp);
709 		vnetp->flags &= ~VNET_STOPPING;
710 	}
711 	RW_EXIT(&vnetp->vrwlock);
712 
713 	DBG1(vnetp, "exit\n");
714 }
715 
716 /* set the unicast mac address of the device */
717 static int
718 vnet_m_unicst(void *arg, const uint8_t *macaddr)
719 {
720 	_NOTE(ARGUNUSED(macaddr))
721 
722 	vnet_t *vnetp = arg;
723 
724 	DBG1(vnetp, "enter\n");
725 	/*
726 	 * NOTE: setting mac address dynamically is not supported.
727 	 */
728 	DBG1(vnetp, "exit\n");
729 
730 	return (VNET_FAILURE);
731 }
732 
733 /* enable/disable a multicast address */
734 static int
735 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
736 {
737 	_NOTE(ARGUNUSED(add, mca))
738 
739 	vnet_t		*vnetp = arg;
740 	vnet_res_t	*vresp;
741 	mac_register_t	*macp;
742 	mac_callbacks_t	*cbp;
743 	int		rv = VNET_SUCCESS;
744 
745 	DBG1(vnetp, "enter\n");
746 
747 	READ_ENTER(&vnetp->vsw_fp_rw);
748 	if (vnetp->vsw_fp == NULL) {
749 		RW_EXIT(&vnetp->vsw_fp_rw);
750 		return (EAGAIN);
751 	}
752 	VNET_FDBE_REFHOLD(vnetp->vsw_fp);
753 	RW_EXIT(&vnetp->vsw_fp_rw);
754 
755 	vresp = vnetp->vsw_fp;
756 	macp = &vresp->macreg;
757 	cbp = macp->m_callbacks;
758 	rv = cbp->mc_multicst(macp->m_driver, add, mca);
759 
760 	VNET_FDBE_REFRELE(vnetp->vsw_fp);
761 
762 	DBG1(vnetp, "exit(%d)\n", rv);
763 	return (rv);
764 }
765 
766 /* set or clear promiscuous mode on the device */
767 static int
768 vnet_m_promisc(void *arg, boolean_t on)
769 {
770 	_NOTE(ARGUNUSED(on))
771 
772 	vnet_t *vnetp = arg;
773 	DBG1(vnetp, "enter\n");
774 	/*
775 	 * NOTE: setting promiscuous mode is not supported, just return success.
776 	 */
777 	DBG1(vnetp, "exit\n");
778 	return (VNET_SUCCESS);
779 }
780 
781 /*
782  * Transmit a chain of packets. This function provides switching functionality
783  * based on the destination mac address to reach other guests (within ldoms) or
784  * external hosts.
785  */
786 mblk_t *
787 vnet_tx_ring_send(void *arg, mblk_t *mp)
788 {
789 	vnet_pseudo_tx_ring_t	*tx_ringp;
790 	vnet_tx_ring_stats_t	*statsp;
791 	vnet_t			*vnetp;
792 	vnet_res_t		*vresp;
793 	mblk_t			*next;
794 	mblk_t			*resid_mp;
795 	mac_register_t		*macp;
796 	struct ether_header	*ehp;
797 	boolean_t		is_unicast;
798 	boolean_t		is_pvid;	/* non-default pvid ? */
799 	boolean_t		hres;		/* Hybrid resource ? */
800 	void			*tx_arg;
801 	size_t			size;
802 
803 	tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
804 	statsp = &tx_ringp->tx_ring_stats;
805 	vnetp = (vnet_t *)tx_ringp->vnetp;
806 	DBG1(vnetp, "enter\n");
807 	ASSERT(mp != NULL);
808 
809 	is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE;
810 
811 	while (mp != NULL) {
812 
813 		next = mp->b_next;
814 		mp->b_next = NULL;
815 
816 		/* update stats */
817 		size = msgsize(mp);
818 
819 		/*
820 		 * Find fdb entry for the destination
821 		 * and hold a reference to it.
822 		 */
823 		ehp = (struct ether_header *)mp->b_rptr;
824 		vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
825 		if (vresp != NULL) {
826 
827 			/*
828 			 * Destination found in FDB.
829 			 * The destination is a vnet device within ldoms
830 			 * and directly reachable, invoke the tx function
831 			 * in the fdb entry.
832 			 */
833 			macp = &vresp->macreg;
834 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
835 
836 			/* tx done; now release ref on fdb entry */
837 			VNET_FDBE_REFRELE(vresp);
838 
839 			if (resid_mp != NULL) {
840 				/* m_tx failed */
841 				mp->b_next = next;
842 				break;
843 			}
844 		} else {
845 			is_unicast = !(IS_BROADCAST(ehp) ||
846 			    (IS_MULTICAST(ehp)));
847 			/*
848 			 * Destination is not in FDB.
849 			 * If the destination is broadcast or multicast,
850 			 * then forward the packet to vswitch.
851 			 * If a Hybrid resource avilable, then send the
852 			 * unicast packet via hybrid resource, otherwise
853 			 * forward it to vswitch.
854 			 */
855 			READ_ENTER(&vnetp->vsw_fp_rw);
856 
857 			if ((is_unicast) && (vnetp->hio_fp != NULL)) {
858 				vresp = vnetp->hio_fp;
859 				hres = B_TRUE;
860 			} else {
861 				vresp = vnetp->vsw_fp;
862 				hres = B_FALSE;
863 			}
864 			if (vresp == NULL) {
865 				/*
866 				 * no fdb entry to vsw? drop the packet.
867 				 */
868 				RW_EXIT(&vnetp->vsw_fp_rw);
869 				freemsg(mp);
870 				mp = next;
871 				continue;
872 			}
873 
874 			/* ref hold the fdb entry to vsw */
875 			VNET_FDBE_REFHOLD(vresp);
876 
877 			RW_EXIT(&vnetp->vsw_fp_rw);
878 
879 			/*
880 			 * In the case of a hybrid resource we need to insert
881 			 * the tag for the pvid case here; unlike packets that
882 			 * are destined to a vnet/vsw in which case the vgen
883 			 * layer does the tagging before sending it over ldc.
884 			 */
885 			if (hres == B_TRUE) {
886 				/*
887 				 * Determine if the frame being transmitted
888 				 * over the hybrid resource is untagged. If so,
889 				 * insert the tag before transmitting.
890 				 */
891 				if (is_pvid == B_TRUE &&
892 				    ehp->ether_type != htons(ETHERTYPE_VLAN)) {
893 
894 					mp = vnet_vlan_insert_tag(mp,
895 					    vnetp->pvid);
896 					if (mp == NULL) {
897 						VNET_FDBE_REFRELE(vresp);
898 						mp = next;
899 						continue;
900 					}
901 
902 				}
903 
904 				macp = &vresp->macreg;
905 				tx_arg = tx_ringp;
906 			} else {
907 				macp = &vresp->macreg;
908 				tx_arg = macp->m_driver;
909 			}
910 			resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp);
911 
912 			/* tx done; now release ref on fdb entry */
913 			VNET_FDBE_REFRELE(vresp);
914 
915 			if (resid_mp != NULL) {
916 				/* m_tx failed */
917 				mp->b_next = next;
918 				break;
919 			}
920 		}
921 
922 		statsp->obytes += size;
923 		statsp->opackets++;
924 		mp = next;
925 	}
926 
927 	DBG1(vnetp, "exit\n");
928 	return (mp);
929 }
930 
931 /* get statistics from the device */
932 int
933 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
934 {
935 	vnet_t *vnetp = arg;
936 	vnet_res_t	*vresp;
937 	mac_register_t	*macp;
938 	mac_callbacks_t	*cbp;
939 	uint64_t val_total = 0;
940 
941 	DBG1(vnetp, "enter\n");
942 
943 	/*
944 	 * get the specified statistic from each transport and return the
945 	 * aggregate val.  This obviously only works for counters.
946 	 */
947 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
948 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
949 		return (ENOTSUP);
950 	}
951 
952 	READ_ENTER(&vnetp->vrwlock);
953 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
954 		macp = &vresp->macreg;
955 		cbp = macp->m_callbacks;
956 		if (cbp->mc_getstat(macp->m_driver, stat, val) == 0)
957 			val_total += *val;
958 	}
959 	RW_EXIT(&vnetp->vrwlock);
960 
961 	*val = val_total;
962 
963 	DBG1(vnetp, "exit\n");
964 	return (0);
965 }
966 
967 static void
968 vnet_ring_grp_init(vnet_t *vnetp)
969 {
970 	vnet_pseudo_rx_group_t	*rx_grp;
971 	vnet_pseudo_rx_ring_t	*rx_ringp;
972 	vnet_pseudo_tx_group_t	*tx_grp;
973 	vnet_pseudo_tx_ring_t	*tx_ringp;
974 	int			i;
975 
976 	tx_grp = &vnetp->tx_grp[0];
977 	tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) *
978 	    VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP);
979 	for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) {
980 		tx_ringp[i].state |= VNET_TXRING_SHARED;
981 	}
982 	tx_grp->rings = tx_ringp;
983 	tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS;
984 	mutex_init(&tx_grp->flowctl_lock, NULL, MUTEX_DRIVER, NULL);
985 	cv_init(&tx_grp->flowctl_cv, NULL, CV_DRIVER, NULL);
986 	tx_grp->flowctl_thread = thread_create(NULL, 0,
987 	    vnet_tx_notify_thread, tx_grp, 0, &p0, TS_RUN, minclsyspri);
988 
989 	rx_grp = &vnetp->rx_grp[0];
990 	rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP;
991 	rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL);
992 	rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) *
993 	    rx_grp->max_ring_cnt, KM_SLEEP);
994 
995 	/*
996 	 * Setup the first 3 Pseudo RX Rings that are reserved;
997 	 * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource.
998 	 */
999 	rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE;
1000 	rx_ringp[0].index = 0;
1001 	rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
1002 	rx_ringp[1].index = 1;
1003 	rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
1004 	rx_ringp[2].index = 2;
1005 
1006 	rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1007 	rx_grp->rings = rx_ringp;
1008 
1009 	for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1010 	    i < rx_grp->max_ring_cnt; i++) {
1011 		rx_ringp = &rx_grp->rings[i];
1012 		rx_ringp->state = VNET_RXRING_FREE;
1013 		rx_ringp->index = i;
1014 	}
1015 }
1016 
1017 static void
1018 vnet_ring_grp_uninit(vnet_t *vnetp)
1019 {
1020 	vnet_pseudo_rx_group_t	*rx_grp;
1021 	vnet_pseudo_tx_group_t	*tx_grp;
1022 	kt_did_t		tid = 0;
1023 
1024 	tx_grp = &vnetp->tx_grp[0];
1025 
1026 	/* Inform tx_notify_thread to exit */
1027 	mutex_enter(&tx_grp->flowctl_lock);
1028 	if (tx_grp->flowctl_thread != NULL) {
1029 		tid = tx_grp->flowctl_thread->t_did;
1030 		tx_grp->flowctl_done = B_TRUE;
1031 		cv_signal(&tx_grp->flowctl_cv);
1032 	}
1033 	mutex_exit(&tx_grp->flowctl_lock);
1034 	if (tid != 0)
1035 		thread_join(tid);
1036 
1037 	if (tx_grp->rings != NULL) {
1038 		ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS);
1039 		kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) *
1040 		    tx_grp->ring_cnt);
1041 		tx_grp->rings = NULL;
1042 	}
1043 
1044 	rx_grp = &vnetp->rx_grp[0];
1045 	if (rx_grp->rings != NULL) {
1046 		ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP);
1047 		ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
1048 		kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) *
1049 		    rx_grp->max_ring_cnt);
1050 		rx_grp->rings = NULL;
1051 	}
1052 }
1053 
1054 static vnet_pseudo_rx_ring_t *
1055 vnet_alloc_pseudo_rx_ring(vnet_t *vnetp)
1056 {
1057 	vnet_pseudo_rx_group_t  *rx_grp;
1058 	vnet_pseudo_rx_ring_t	*rx_ringp;
1059 	int			index;
1060 
1061 	rx_grp = &vnetp->rx_grp[0];
1062 	WRITE_ENTER(&rx_grp->lock);
1063 
1064 	if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) {
1065 		/* no rings available */
1066 		RW_EXIT(&rx_grp->lock);
1067 		return (NULL);
1068 	}
1069 
1070 	for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1071 	    index < rx_grp->max_ring_cnt; index++) {
1072 		rx_ringp = &rx_grp->rings[index];
1073 		if (rx_ringp->state == VNET_RXRING_FREE) {
1074 			rx_ringp->state |= VNET_RXRING_INUSE;
1075 			rx_grp->ring_cnt++;
1076 			break;
1077 		}
1078 	}
1079 
1080 	RW_EXIT(&rx_grp->lock);
1081 	return (rx_ringp);
1082 }
1083 
1084 static void
1085 vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp)
1086 {
1087 	vnet_pseudo_rx_group_t  *rx_grp;
1088 
1089 	ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
1090 	rx_grp = &vnetp->rx_grp[0];
1091 	WRITE_ENTER(&rx_grp->lock);
1092 
1093 	if (ringp->state != VNET_RXRING_FREE) {
1094 		ringp->state = VNET_RXRING_FREE;
1095 		ringp->handle = NULL;
1096 		rx_grp->ring_cnt--;
1097 	}
1098 
1099 	RW_EXIT(&rx_grp->lock);
1100 }
1101 
1102 /* wrapper function for mac_register() */
1103 static int
1104 vnet_mac_register(vnet_t *vnetp)
1105 {
1106 	mac_register_t	*macp;
1107 	int		err;
1108 
1109 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1110 		return (DDI_FAILURE);
1111 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1112 	macp->m_driver = vnetp;
1113 	macp->m_dip = vnetp->dip;
1114 	macp->m_src_addr = vnetp->curr_macaddr;
1115 	macp->m_callbacks = &vnet_m_callbacks;
1116 	macp->m_min_sdu = 0;
1117 	macp->m_max_sdu = vnetp->mtu;
1118 	macp->m_margin = VLAN_TAGSZ;
1119 
1120 	macp->m_v12n = MAC_VIRT_LEVEL1;
1121 
1122 	/*
1123 	 * Finally, we're ready to register ourselves with the MAC layer
1124 	 * interface; if this succeeds, we're all ready to start()
1125 	 */
1126 	err = mac_register(macp, &vnetp->mh);
1127 	mac_free(macp);
1128 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
1129 }
1130 
1131 /* read the mac address of the device */
1132 static int
1133 vnet_read_mac_address(vnet_t *vnetp)
1134 {
1135 	uchar_t 	*macaddr;
1136 	uint32_t 	size;
1137 	int 		rv;
1138 
1139 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
1140 	    DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
1141 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
1142 		DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
1143 		    macaddr_propname, rv);
1144 		return (DDI_FAILURE);
1145 	}
1146 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
1147 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
1148 	ddi_prop_free(macaddr);
1149 
1150 	return (DDI_SUCCESS);
1151 }
1152 
1153 static void
1154 vnet_fdb_create(vnet_t *vnetp)
1155 {
1156 	char		hashname[MAXNAMELEN];
1157 
1158 	(void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
1159 	    vnetp->instance);
1160 	vnetp->fdb_nchains = vnet_fdb_nchains;
1161 	vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
1162 	    mod_hash_null_valdtor, sizeof (void *));
1163 }
1164 
1165 static void
1166 vnet_fdb_destroy(vnet_t *vnetp)
1167 {
1168 	/* destroy fdb-hash-table */
1169 	if (vnetp->fdb_hashp != NULL) {
1170 		mod_hash_destroy_hash(vnetp->fdb_hashp);
1171 		vnetp->fdb_hashp = NULL;
1172 		vnetp->fdb_nchains = 0;
1173 	}
1174 }
1175 
1176 /*
1177  * Add an entry into the fdb.
1178  */
1179 void
1180 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp)
1181 {
1182 	uint64_t	addr = 0;
1183 	int		rv;
1184 
1185 	KEY_HASH(addr, vresp->rem_macaddr);
1186 
1187 	/*
1188 	 * If the entry being added corresponds to LDC_SERVICE resource,
1189 	 * that is, vswitch connection, it is added to the hash and also
1190 	 * the entry is cached, an additional reference count reflects
1191 	 * this. The HYBRID resource is not added to the hash, but only
1192 	 * cached, as it is only used for sending out packets for unknown
1193 	 * unicast destinations.
1194 	 */
1195 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
1196 	    (vresp->refcnt = 1) : (vresp->refcnt = 0);
1197 
1198 	/*
1199 	 * Note: duplicate keys will be rejected by mod_hash.
1200 	 */
1201 	if (vresp->type != VIO_NET_RES_HYBRID) {
1202 		rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
1203 		    (mod_hash_val_t)vresp);
1204 		if (rv != 0) {
1205 			DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
1206 			return;
1207 		}
1208 	}
1209 
1210 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
1211 		/* Cache the fdb entry to vsw-port */
1212 		WRITE_ENTER(&vnetp->vsw_fp_rw);
1213 		if (vnetp->vsw_fp == NULL)
1214 			vnetp->vsw_fp = vresp;
1215 		RW_EXIT(&vnetp->vsw_fp_rw);
1216 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
1217 		/* Cache the fdb entry to hybrid resource */
1218 		WRITE_ENTER(&vnetp->vsw_fp_rw);
1219 		if (vnetp->hio_fp == NULL)
1220 			vnetp->hio_fp = vresp;
1221 		RW_EXIT(&vnetp->vsw_fp_rw);
1222 	}
1223 }
1224 
1225 /*
1226  * Remove an entry from fdb.
1227  */
1228 static void
1229 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp)
1230 {
1231 	uint64_t	addr = 0;
1232 	int		rv;
1233 	uint32_t	refcnt;
1234 	vnet_res_t	*tmp;
1235 
1236 	KEY_HASH(addr, vresp->rem_macaddr);
1237 
1238 	/*
1239 	 * Remove the entry from fdb hash table.
1240 	 * This prevents further references to this fdb entry.
1241 	 */
1242 	if (vresp->type != VIO_NET_RES_HYBRID) {
1243 		rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
1244 		    (mod_hash_val_t *)&tmp);
1245 		if (rv != 0) {
1246 			/*
1247 			 * As the resources are added to the hash only
1248 			 * after they are started, this can occur if
1249 			 * a resource unregisters before it is ever started.
1250 			 */
1251 			return;
1252 		}
1253 	}
1254 
1255 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
1256 		WRITE_ENTER(&vnetp->vsw_fp_rw);
1257 
1258 		ASSERT(tmp == vnetp->vsw_fp);
1259 		vnetp->vsw_fp = NULL;
1260 
1261 		RW_EXIT(&vnetp->vsw_fp_rw);
1262 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
1263 		WRITE_ENTER(&vnetp->vsw_fp_rw);
1264 
1265 		vnetp->hio_fp = NULL;
1266 
1267 		RW_EXIT(&vnetp->vsw_fp_rw);
1268 	}
1269 
1270 	/*
1271 	 * If there are threads already ref holding before the entry was
1272 	 * removed from hash table, then wait for ref count to drop to zero.
1273 	 */
1274 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
1275 	    (refcnt = 1) : (refcnt = 0);
1276 	while (vresp->refcnt > refcnt) {
1277 		delay(drv_usectohz(vnet_fdbe_refcnt_delay));
1278 	}
1279 }
1280 
1281 /*
1282  * Search fdb for a given mac address. If an entry is found, hold
1283  * a reference to it and return the entry; else returns NULL.
1284  */
1285 static vnet_res_t *
1286 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
1287 {
1288 	uint64_t	key = 0;
1289 	vnet_res_t	*vresp;
1290 	int		rv;
1291 
1292 	KEY_HASH(key, addrp->ether_addr_octet);
1293 
1294 	rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
1295 	    (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb);
1296 
1297 	if (rv != 0)
1298 		return (NULL);
1299 
1300 	return (vresp);
1301 }
1302 
1303 /*
1304  * Callback function provided to mod_hash_find_cb(). After finding the fdb
1305  * entry corresponding to the key (macaddr), this callback will be invoked by
1306  * mod_hash_find_cb() to atomically increment the reference count on the fdb
1307  * entry before returning the found entry.
1308  */
1309 static void
1310 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
1311 {
1312 	_NOTE(ARGUNUSED(key))
1313 	VNET_FDBE_REFHOLD((vnet_res_t *)val);
1314 }
1315 
1316 /*
1317  * Frames received that are tagged with the pvid of the vnet device must be
1318  * untagged before sending up the stack. This function walks the chain of rx
1319  * frames, untags any such frames and returns the updated chain.
1320  *
1321  * Arguments:
1322  *    pvid:  pvid of the vnet device for which packets are being received
1323  *    mp:    head of pkt chain to be validated and untagged
1324  *
1325  * Returns:
1326  *    mp:    head of updated chain of packets
1327  */
1328 static void
1329 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp)
1330 {
1331 	struct ether_vlan_header	*evhp;
1332 	mblk_t				*bp;
1333 	mblk_t				*bpt;
1334 	mblk_t				*bph;
1335 	mblk_t				*bpn;
1336 
1337 	bpn = bph = bpt = NULL;
1338 
1339 	for (bp = *mp; bp != NULL; bp = bpn) {
1340 
1341 		bpn = bp->b_next;
1342 		bp->b_next = bp->b_prev = NULL;
1343 
1344 		evhp = (struct ether_vlan_header *)bp->b_rptr;
1345 
1346 		if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN &&
1347 		    VLAN_ID(ntohs(evhp->ether_tci)) == pvid) {
1348 
1349 			bp = vnet_vlan_remove_tag(bp);
1350 			if (bp == NULL) {
1351 				continue;
1352 			}
1353 
1354 		}
1355 
1356 		/* build a chain of processed packets */
1357 		if (bph == NULL) {
1358 			bph = bpt = bp;
1359 		} else {
1360 			bpt->b_next = bp;
1361 			bpt = bp;
1362 		}
1363 
1364 	}
1365 
1366 	*mp = bph;
1367 }
1368 
1369 static void
1370 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
1371 {
1372 	vnet_res_t		*vresp = (vnet_res_t *)vrh;
1373 	vnet_t			*vnetp = vresp->vnetp;
1374 	vnet_pseudo_rx_ring_t	*ringp;
1375 
1376 	if ((vnetp == NULL) || (vnetp->mh == 0)) {
1377 		freemsgchain(mp);
1378 		return;
1379 	}
1380 
1381 	ringp = vresp->rx_ringp;
1382 	mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
1383 }
1384 
1385 void
1386 vnet_tx_update(vio_net_handle_t vrh)
1387 {
1388 	vnet_res_t		*vresp = (vnet_res_t *)vrh;
1389 	vnet_t			*vnetp = vresp->vnetp;
1390 	vnet_pseudo_tx_ring_t	*tx_ringp;
1391 	vnet_pseudo_tx_group_t	*tx_grp;
1392 	int			i;
1393 
1394 	if (vnetp == NULL || vnetp->mh == NULL) {
1395 		return;
1396 	}
1397 
1398 	/*
1399 	 * Currently, the tx hwring API (used to access rings that belong to
1400 	 * a Hybrid IO resource) does not provide us a per ring flow ctrl
1401 	 * update; also the pseudo rings are shared by the ports/ldcs in the
1402 	 * vgen layer. Thus we can't figure out which pseudo ring is being
1403 	 * re-enabled for transmits. To work around this, when we get a tx
1404 	 * restart notification from below, we simply propagate that to all
1405 	 * the tx pseudo rings registered with the mac layer above.
1406 	 *
1407 	 * There are a couple of side effects with this approach, but they are
1408 	 * not harmful, as outlined below:
1409 	 *
1410 	 * A) We might send an invalid ring_update() for a ring that is not
1411 	 * really flow controlled. This will not have any effect in the mac
1412 	 * layer and packets will continue to be transmitted on that ring.
1413 	 *
1414 	 * B) We might end up clearing the flow control in the mac layer for
1415 	 * a ring that is still flow controlled in the underlying resource.
1416 	 * This will result in the mac layer restarting	transmit, only to be
1417 	 * flow controlled again on that ring.
1418 	 */
1419 	tx_grp = &vnetp->tx_grp[0];
1420 	for (i = 0; i < tx_grp->ring_cnt; i++) {
1421 		tx_ringp = &tx_grp->rings[i];
1422 		mac_tx_ring_update(vnetp->mh, tx_ringp->handle);
1423 	}
1424 }
1425 
1426 /*
1427  * vnet_tx_notify_thread:
1428  *
1429  * vnet_tx_ring_update() callback function wakes up this thread when
1430  * it gets called. This thread will call mac_tx_ring_update() to
1431  * notify upper mac of flow control getting relieved. Note that
1432  * vnet_tx_ring_update() cannot call mac_tx_ring_update() directly
1433  * because vnet_tx_ring_update() is called from lower mac with
1434  * mi_rw_lock held and mac_tx_ring_update() would also try to grab
1435  * the same lock.
1436  */
1437 static void
1438 vnet_tx_notify_thread(void *arg)
1439 {
1440 	callb_cpr_t		cprinfo;
1441 	vnet_pseudo_tx_group_t	*tx_grp = (vnet_pseudo_tx_group_t *)arg;
1442 	vnet_pseudo_tx_ring_t	*tx_ringp;
1443 	vnet_t			*vnetp;
1444 	int			i;
1445 
1446 	CALLB_CPR_INIT(&cprinfo, &tx_grp->flowctl_lock, callb_generic_cpr,
1447 	    "vnet_tx_notify_thread");
1448 
1449 	mutex_enter(&tx_grp->flowctl_lock);
1450 	while (!tx_grp->flowctl_done) {
1451 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
1452 		cv_wait(&tx_grp->flowctl_cv, &tx_grp->flowctl_lock);
1453 		CALLB_CPR_SAFE_END(&cprinfo, &tx_grp->flowctl_lock);
1454 
1455 		for (i = 0; i < tx_grp->ring_cnt; i++) {
1456 			tx_ringp = &tx_grp->rings[i];
1457 			if (tx_ringp->woken_up) {
1458 				tx_ringp->woken_up = B_FALSE;
1459 				vnetp = tx_ringp->vnetp;
1460 				mac_tx_ring_update(vnetp->mh, tx_ringp->handle);
1461 			}
1462 		}
1463 	}
1464 	/*
1465 	 * The tx_grp is being destroyed, exit the thread.
1466 	 */
1467 	tx_grp->flowctl_thread = NULL;
1468 	CALLB_CPR_EXIT(&cprinfo);
1469 	thread_exit();
1470 }
1471 
1472 void
1473 vnet_tx_ring_update(void *arg1, uintptr_t arg2)
1474 {
1475 	vnet_t			*vnetp = (vnet_t *)arg1;
1476 	vnet_pseudo_tx_group_t	*tx_grp;
1477 	vnet_pseudo_tx_ring_t	*tx_ringp;
1478 	int			i;
1479 
1480 	tx_grp = &vnetp->tx_grp[0];
1481 	for (i = 0; i < tx_grp->ring_cnt; i++) {
1482 		tx_ringp = &tx_grp->rings[i];
1483 		if (tx_ringp->hw_rh == (mac_ring_handle_t)arg2) {
1484 			mutex_enter(&tx_grp->flowctl_lock);
1485 			tx_ringp->woken_up = B_TRUE;
1486 			cv_signal(&tx_grp->flowctl_cv);
1487 			mutex_exit(&tx_grp->flowctl_lock);
1488 			break;
1489 		}
1490 	}
1491 }
1492 
1493 /*
1494  * Update the new mtu of vnet into the mac layer. First check if the device has
1495  * been plumbed and if so fail the mtu update. Returns 0 on success.
1496  */
1497 int
1498 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu)
1499 {
1500 	int	rv;
1501 
1502 	if (vnetp == NULL || vnetp->mh == NULL) {
1503 		return (EINVAL);
1504 	}
1505 
1506 	WRITE_ENTER(&vnetp->vrwlock);
1507 
1508 	if (vnetp->flags & VNET_STARTED) {
1509 		RW_EXIT(&vnetp->vrwlock);
1510 		cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu "
1511 		    "update as the device is plumbed\n",
1512 		    vnetp->instance);
1513 		return (EBUSY);
1514 	}
1515 
1516 	/* update mtu in the mac layer */
1517 	rv = mac_maxsdu_update(vnetp->mh, mtu);
1518 	if (rv != 0) {
1519 		RW_EXIT(&vnetp->vrwlock);
1520 		cmn_err(CE_NOTE,
1521 		    "!vnet%d: Unable to update mtu with mac layer\n",
1522 		    vnetp->instance);
1523 		return (EIO);
1524 	}
1525 
1526 	vnetp->mtu = mtu;
1527 
1528 	RW_EXIT(&vnetp->vrwlock);
1529 
1530 	return (0);
1531 }
1532 
1533 /*
1534  * Update the link state of vnet to the mac layer.
1535  */
1536 void
1537 vnet_link_update(vnet_t *vnetp, link_state_t link_state)
1538 {
1539 	if (vnetp == NULL || vnetp->mh == NULL) {
1540 		return;
1541 	}
1542 
1543 	WRITE_ENTER(&vnetp->vrwlock);
1544 	if (vnetp->link_state == link_state) {
1545 		RW_EXIT(&vnetp->vrwlock);
1546 		return;
1547 	}
1548 	vnetp->link_state = link_state;
1549 	RW_EXIT(&vnetp->vrwlock);
1550 
1551 	mac_link_update(vnetp->mh, link_state);
1552 }
1553 
1554 /*
1555  * vio_net_resource_reg -- An interface called to register a resource
1556  *	with vnet.
1557  *	macp -- a GLDv3 mac_register that has all the details of
1558  *		a resource and its callbacks etc.
1559  *	type -- resource type.
1560  *	local_macaddr -- resource's MAC address. This is used to
1561  *			 associate a resource with a corresponding vnet.
1562  *	remote_macaddr -- remote side MAC address. This is ignored for
1563  *			  the Hybrid resources.
1564  *	vhp -- A handle returned to the caller.
1565  *	vcb -- A set of callbacks provided to the callers.
1566  */
1567 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
1568     ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
1569     vio_net_callbacks_t *vcb)
1570 {
1571 	vnet_t		*vnetp;
1572 	vnet_res_t	*vresp;
1573 
1574 	vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
1575 	ether_copy(local_macaddr, vresp->local_macaddr);
1576 	ether_copy(rem_macaddr, vresp->rem_macaddr);
1577 	vresp->type = type;
1578 	bcopy(macp, &vresp->macreg, sizeof (mac_register_t));
1579 
1580 	DBG1(NULL, "Resource Registerig type=0%X\n", type);
1581 
1582 	READ_ENTER(&vnet_rw);
1583 	vnetp = vnet_headp;
1584 	while (vnetp != NULL) {
1585 		if (VNET_MATCH_RES(vresp, vnetp)) {
1586 			vresp->vnetp = vnetp;
1587 
1588 			/* Setup kstats for hio resource */
1589 			if (vresp->type == VIO_NET_RES_HYBRID) {
1590 				vresp->ksp = vnet_hio_setup_kstats(DRV_NAME,
1591 				    "hio", vresp);
1592 				if (vresp->ksp == NULL) {
1593 					cmn_err(CE_NOTE, "!vnet%d: Cannot "
1594 					    "create kstats for hio resource",
1595 					    vnetp->instance);
1596 				}
1597 			}
1598 			vnet_add_resource(vnetp, vresp);
1599 			break;
1600 		}
1601 		vnetp = vnetp->nextp;
1602 	}
1603 	RW_EXIT(&vnet_rw);
1604 	if (vresp->vnetp == NULL) {
1605 		DWARN(NULL, "No vnet instance");
1606 		kmem_free(vresp, sizeof (vnet_res_t));
1607 		return (ENXIO);
1608 	}
1609 
1610 	*vhp = vresp;
1611 	vcb->vio_net_rx_cb = vnet_rx;
1612 	vcb->vio_net_tx_update = vnet_tx_update;
1613 	vcb->vio_net_report_err = vnet_handle_res_err;
1614 
1615 	/* Bind the resource to pseudo ring(s) */
1616 	if (vnet_bind_rings(vresp) != 0) {
1617 		(void) vnet_rem_resource(vnetp, vresp);
1618 		vnet_hio_destroy_kstats(vresp->ksp);
1619 		KMEM_FREE(vresp);
1620 		return (1);
1621 	}
1622 
1623 	/* Dispatch a task to start resources */
1624 	vnet_dispatch_res_task(vnetp);
1625 	return (0);
1626 }
1627 
1628 /*
1629  * vio_net_resource_unreg -- An interface to unregister a resource.
1630  */
1631 void
1632 vio_net_resource_unreg(vio_net_handle_t vhp)
1633 {
1634 	vnet_res_t	*vresp = (vnet_res_t *)vhp;
1635 	vnet_t		*vnetp = vresp->vnetp;
1636 
1637 	DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
1638 
1639 	ASSERT(vnetp != NULL);
1640 	/*
1641 	 * Remove the resource from fdb; this ensures
1642 	 * there are no references to the resource.
1643 	 */
1644 	vnet_fdbe_del(vnetp, vresp);
1645 
1646 	vnet_unbind_rings(vresp);
1647 
1648 	/* Now remove the resource from the list */
1649 	(void) vnet_rem_resource(vnetp, vresp);
1650 
1651 	vnet_hio_destroy_kstats(vresp->ksp);
1652 	KMEM_FREE(vresp);
1653 }
1654 
1655 static void
1656 vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp)
1657 {
1658 	WRITE_ENTER(&vnetp->vrwlock);
1659 	vresp->nextp = vnetp->vres_list;
1660 	vnetp->vres_list = vresp;
1661 	RW_EXIT(&vnetp->vrwlock);
1662 }
1663 
1664 static vnet_res_t *
1665 vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp)
1666 {
1667 	vnet_res_t	*vrp;
1668 
1669 	WRITE_ENTER(&vnetp->vrwlock);
1670 	if (vresp == vnetp->vres_list) {
1671 		vnetp->vres_list = vresp->nextp;
1672 	} else {
1673 		vrp = vnetp->vres_list;
1674 		while (vrp->nextp != NULL) {
1675 			if (vrp->nextp == vresp) {
1676 				vrp->nextp = vresp->nextp;
1677 				break;
1678 			}
1679 			vrp = vrp->nextp;
1680 		}
1681 	}
1682 	vresp->vnetp = NULL;
1683 	vresp->nextp = NULL;
1684 
1685 	RW_EXIT(&vnetp->vrwlock);
1686 
1687 	return (vresp);
1688 }
1689 
1690 /*
1691  * vnet_dds_rx -- an interface called by vgen to DDS messages.
1692  */
1693 void
1694 vnet_dds_rx(void *arg, void *dmsg)
1695 {
1696 	vnet_t *vnetp = arg;
1697 	vdds_process_dds_msg(vnetp, dmsg);
1698 }
1699 
1700 /*
1701  * vnet_send_dds_msg -- An interface provided to DDS to send
1702  *	DDS messages. This simply sends meessages via vgen.
1703  */
1704 int
1705 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg)
1706 {
1707 	int rv;
1708 
1709 	if (vnetp->vgenhdl != NULL) {
1710 		rv = vgen_dds_tx(vnetp->vgenhdl, dmsg);
1711 	}
1712 	return (rv);
1713 }
1714 
1715 /*
1716  * vnet_cleanup_hio -- an interface called by vgen to cleanup hio resources.
1717  */
1718 void
1719 vnet_dds_cleanup_hio(vnet_t *vnetp)
1720 {
1721 	vdds_cleanup_hio(vnetp);
1722 }
1723 
1724 /*
1725  * vnet_handle_res_err -- A callback function called by a resource
1726  *	to report an error. For example, vgen can call to report
1727  *	an LDC down/reset event. This will trigger cleanup of associated
1728  *	Hybrid resource.
1729  */
1730 /* ARGSUSED */
1731 static void
1732 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err)
1733 {
1734 	vnet_res_t *vresp = (vnet_res_t *)vrh;
1735 	vnet_t *vnetp = vresp->vnetp;
1736 
1737 	if (vnetp == NULL) {
1738 		return;
1739 	}
1740 	if ((vresp->type != VIO_NET_RES_LDC_SERVICE) &&
1741 	    (vresp->type != VIO_NET_RES_HYBRID)) {
1742 		return;
1743 	}
1744 
1745 	vdds_cleanup_hio(vnetp);
1746 }
1747 
1748 /*
1749  * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
1750  */
1751 static void
1752 vnet_dispatch_res_task(vnet_t *vnetp)
1753 {
1754 	int rv;
1755 
1756 	/*
1757 	 * Dispatch the task. It could be the case that vnetp->flags does
1758 	 * not have VNET_STARTED set. This is ok as vnet_rest_start_task()
1759 	 * can abort the task when the task is started. See related comments
1760 	 * in vnet_m_stop() and vnet_stop_resources().
1761 	 */
1762 	rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task,
1763 	    vnetp, DDI_NOSLEEP);
1764 	if (rv != DDI_SUCCESS) {
1765 		cmn_err(CE_WARN,
1766 		    "vnet%d:Can't dispatch start resource task",
1767 		    vnetp->instance);
1768 	}
1769 }
1770 
1771 /*
1772  * vnet_res_start_task -- A taskq callback function that starts a resource.
1773  */
1774 static void
1775 vnet_res_start_task(void *arg)
1776 {
1777 	vnet_t *vnetp = arg;
1778 
1779 	WRITE_ENTER(&vnetp->vrwlock);
1780 	if (vnetp->flags & VNET_STARTED) {
1781 		vnet_start_resources(vnetp);
1782 	}
1783 	RW_EXIT(&vnetp->vrwlock);
1784 }
1785 
1786 /*
1787  * vnet_start_resources -- starts all resources associated with
1788  *	a vnet.
1789  */
1790 static void
1791 vnet_start_resources(vnet_t *vnetp)
1792 {
1793 	mac_register_t	*macp;
1794 	mac_callbacks_t	*cbp;
1795 	vnet_res_t	*vresp;
1796 	int rv;
1797 
1798 	DBG1(vnetp, "enter\n");
1799 
1800 	ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
1801 
1802 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
1803 		/* skip if it is already started */
1804 		if (vresp->flags & VNET_STARTED) {
1805 			continue;
1806 		}
1807 		macp = &vresp->macreg;
1808 		cbp = macp->m_callbacks;
1809 		rv = cbp->mc_start(macp->m_driver);
1810 		if (rv == 0) {
1811 			/*
1812 			 * Successfully started the resource, so now
1813 			 * add it to the fdb.
1814 			 */
1815 			vresp->flags |= VNET_STARTED;
1816 			vnet_fdbe_add(vnetp, vresp);
1817 		}
1818 	}
1819 
1820 	DBG1(vnetp, "exit\n");
1821 
1822 }
1823 
1824 /*
1825  * vnet_stop_resources -- stop all resources associated with a vnet.
1826  */
1827 static void
1828 vnet_stop_resources(vnet_t *vnetp)
1829 {
1830 	vnet_res_t	*vresp;
1831 	mac_register_t	*macp;
1832 	mac_callbacks_t	*cbp;
1833 
1834 	DBG1(vnetp, "enter\n");
1835 
1836 	ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
1837 
1838 	for (vresp = vnetp->vres_list; vresp != NULL; ) {
1839 		if (vresp->flags & VNET_STARTED) {
1840 			/*
1841 			 * Release the lock while invoking mc_stop() of the
1842 			 * underlying resource. We hold a reference to this
1843 			 * resource to prevent being removed from the list in
1844 			 * vio_net_resource_unreg(). Note that new resources
1845 			 * can be added to the head of the list while the lock
1846 			 * is released, but they won't be started, as
1847 			 * VNET_STARTED flag has been cleared for the vnet
1848 			 * device in vnet_m_stop(). Also, while the lock is
1849 			 * released a resource could be removed from the list
1850 			 * in vio_net_resource_unreg(); but that is ok, as we
1851 			 * re-acquire the lock and only then access the forward
1852 			 * link (vresp->nextp) to continue with the next
1853 			 * resource.
1854 			 */
1855 			vresp->flags &= ~VNET_STARTED;
1856 			vresp->flags |= VNET_STOPPING;
1857 			macp = &vresp->macreg;
1858 			cbp = macp->m_callbacks;
1859 			VNET_FDBE_REFHOLD(vresp);
1860 			RW_EXIT(&vnetp->vrwlock);
1861 
1862 			cbp->mc_stop(macp->m_driver);
1863 
1864 			WRITE_ENTER(&vnetp->vrwlock);
1865 			vresp->flags &= ~VNET_STOPPING;
1866 			VNET_FDBE_REFRELE(vresp);
1867 		}
1868 		vresp = vresp->nextp;
1869 	}
1870 	DBG1(vnetp, "exit\n");
1871 }
1872 
1873 /*
1874  * Setup kstats for the HIO statistics.
1875  * NOTE: the synchronization for the statistics is the
1876  * responsibility of the caller.
1877  */
1878 kstat_t *
1879 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp)
1880 {
1881 	kstat_t *ksp;
1882 	vnet_t *vnetp = vresp->vnetp;
1883 	vnet_hio_kstats_t *hiokp;
1884 	size_t size;
1885 
1886 	ASSERT(vnetp != NULL);
1887 	size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t);
1888 	ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net",
1889 	    KSTAT_TYPE_NAMED, size, 0);
1890 	if (ksp == NULL) {
1891 		return (NULL);
1892 	}
1893 
1894 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1895 	kstat_named_init(&hiokp->ipackets,		"ipackets",
1896 	    KSTAT_DATA_ULONG);
1897 	kstat_named_init(&hiokp->ierrors,		"ierrors",
1898 	    KSTAT_DATA_ULONG);
1899 	kstat_named_init(&hiokp->opackets,		"opackets",
1900 	    KSTAT_DATA_ULONG);
1901 	kstat_named_init(&hiokp->oerrors,		"oerrors",
1902 	    KSTAT_DATA_ULONG);
1903 
1904 
1905 	/* MIB II kstat variables */
1906 	kstat_named_init(&hiokp->rbytes,		"rbytes",
1907 	    KSTAT_DATA_ULONG);
1908 	kstat_named_init(&hiokp->obytes,		"obytes",
1909 	    KSTAT_DATA_ULONG);
1910 	kstat_named_init(&hiokp->multircv,		"multircv",
1911 	    KSTAT_DATA_ULONG);
1912 	kstat_named_init(&hiokp->multixmt,		"multixmt",
1913 	    KSTAT_DATA_ULONG);
1914 	kstat_named_init(&hiokp->brdcstrcv,		"brdcstrcv",
1915 	    KSTAT_DATA_ULONG);
1916 	kstat_named_init(&hiokp->brdcstxmt,		"brdcstxmt",
1917 	    KSTAT_DATA_ULONG);
1918 	kstat_named_init(&hiokp->norcvbuf,		"norcvbuf",
1919 	    KSTAT_DATA_ULONG);
1920 	kstat_named_init(&hiokp->noxmtbuf,		"noxmtbuf",
1921 	    KSTAT_DATA_ULONG);
1922 
1923 	ksp->ks_update = vnet_hio_update_kstats;
1924 	ksp->ks_private = (void *)vresp;
1925 	kstat_install(ksp);
1926 	return (ksp);
1927 }
1928 
1929 /*
1930  * Destroy kstats.
1931  */
1932 static void
1933 vnet_hio_destroy_kstats(kstat_t *ksp)
1934 {
1935 	if (ksp != NULL)
1936 		kstat_delete(ksp);
1937 }
1938 
1939 /*
1940  * Update the kstats.
1941  */
1942 static int
1943 vnet_hio_update_kstats(kstat_t *ksp, int rw)
1944 {
1945 	vnet_t *vnetp;
1946 	vnet_res_t *vresp;
1947 	vnet_hio_stats_t statsp;
1948 	vnet_hio_kstats_t *hiokp;
1949 
1950 	vresp = (vnet_res_t *)ksp->ks_private;
1951 	vnetp = vresp->vnetp;
1952 
1953 	bzero(&statsp, sizeof (vnet_hio_stats_t));
1954 
1955 	READ_ENTER(&vnetp->vsw_fp_rw);
1956 	if (vnetp->hio_fp == NULL) {
1957 		/* not using hio resources, just return */
1958 		RW_EXIT(&vnetp->vsw_fp_rw);
1959 		return (0);
1960 	}
1961 	VNET_FDBE_REFHOLD(vnetp->hio_fp);
1962 	RW_EXIT(&vnetp->vsw_fp_rw);
1963 	vnet_hio_get_stats(vnetp->hio_fp, &statsp);
1964 	VNET_FDBE_REFRELE(vnetp->hio_fp);
1965 
1966 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1967 
1968 	if (rw == KSTAT_READ) {
1969 		/* Link Input/Output stats */
1970 		hiokp->ipackets.value.ul	= (uint32_t)statsp.ipackets;
1971 		hiokp->ipackets64.value.ull	= statsp.ipackets;
1972 		hiokp->ierrors.value.ul		= statsp.ierrors;
1973 		hiokp->opackets.value.ul	= (uint32_t)statsp.opackets;
1974 		hiokp->opackets64.value.ull	= statsp.opackets;
1975 		hiokp->oerrors.value.ul		= statsp.oerrors;
1976 
1977 		/* MIB II kstat variables */
1978 		hiokp->rbytes.value.ul		= (uint32_t)statsp.rbytes;
1979 		hiokp->rbytes64.value.ull	= statsp.rbytes;
1980 		hiokp->obytes.value.ul		= (uint32_t)statsp.obytes;
1981 		hiokp->obytes64.value.ull	= statsp.obytes;
1982 		hiokp->multircv.value.ul	= statsp.multircv;
1983 		hiokp->multixmt.value.ul	= statsp.multixmt;
1984 		hiokp->brdcstrcv.value.ul	= statsp.brdcstrcv;
1985 		hiokp->brdcstxmt.value.ul	= statsp.brdcstxmt;
1986 		hiokp->norcvbuf.value.ul	= statsp.norcvbuf;
1987 		hiokp->noxmtbuf.value.ul	= statsp.noxmtbuf;
1988 	} else {
1989 		return (EACCES);
1990 	}
1991 
1992 	return (0);
1993 }
1994 
1995 static void
1996 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp)
1997 {
1998 	mac_register_t		*macp;
1999 	mac_callbacks_t		*cbp;
2000 	uint64_t		val;
2001 	int			stat;
2002 
2003 	/*
2004 	 * get the specified statistics from the underlying nxge.
2005 	 */
2006 	macp = &vresp->macreg;
2007 	cbp = macp->m_callbacks;
2008 	for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) {
2009 		if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) {
2010 			switch (stat) {
2011 			case MAC_STAT_IPACKETS:
2012 				statsp->ipackets = val;
2013 				break;
2014 
2015 			case MAC_STAT_IERRORS:
2016 				statsp->ierrors = val;
2017 				break;
2018 
2019 			case MAC_STAT_OPACKETS:
2020 				statsp->opackets = val;
2021 				break;
2022 
2023 			case MAC_STAT_OERRORS:
2024 				statsp->oerrors = val;
2025 				break;
2026 
2027 			case MAC_STAT_RBYTES:
2028 				statsp->rbytes = val;
2029 				break;
2030 
2031 			case MAC_STAT_OBYTES:
2032 				statsp->obytes = val;
2033 				break;
2034 
2035 			case MAC_STAT_MULTIRCV:
2036 				statsp->multircv = val;
2037 				break;
2038 
2039 			case MAC_STAT_MULTIXMT:
2040 				statsp->multixmt = val;
2041 				break;
2042 
2043 			case MAC_STAT_BRDCSTRCV:
2044 				statsp->brdcstrcv = val;
2045 				break;
2046 
2047 			case MAC_STAT_BRDCSTXMT:
2048 				statsp->brdcstxmt = val;
2049 				break;
2050 
2051 			case MAC_STAT_NOXMTBUF:
2052 				statsp->noxmtbuf = val;
2053 				break;
2054 
2055 			case MAC_STAT_NORCVBUF:
2056 				statsp->norcvbuf = val;
2057 				break;
2058 
2059 			default:
2060 				/*
2061 				 * parameters not interested.
2062 				 */
2063 				break;
2064 			}
2065 		}
2066 	}
2067 }
2068 
2069 static boolean_t
2070 vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data)
2071 {
2072 	vnet_t	*vnetp = (vnet_t *)arg;
2073 
2074 	if (vnetp == NULL) {
2075 		return (0);
2076 	}
2077 
2078 	switch (cap) {
2079 
2080 	case MAC_CAPAB_RINGS: {
2081 
2082 		mac_capab_rings_t *cap_rings = cap_data;
2083 		/*
2084 		 * Rings Capability Notes:
2085 		 * We advertise rings to make use of the rings framework in
2086 		 * gldv3 mac layer, to improve the performance. This is
2087 		 * specifically needed when a Hybrid resource (with multiple
2088 		 * tx/rx hardware rings) is assigned to a vnet device. We also
2089 		 * leverage this for the normal case when no Hybrid resource is
2090 		 * assigned.
2091 		 *
2092 		 * Ring Allocation:
2093 		 * - TX path:
2094 		 * We expose a pseudo ring group with 2 pseudo tx rings (as
2095 		 * currently HybridIO exports only 2 rings) In the normal case,
2096 		 * transmit traffic that comes down to the driver through the
2097 		 * mri_tx (vnet_tx_ring_send()) entry point goes through the
2098 		 * distributed switching algorithm in vnet and gets transmitted
2099 		 * over a port/LDC in the vgen layer to either the vswitch or a
2100 		 * peer vnet. If and when a Hybrid resource is assigned to the
2101 		 * vnet, we obtain the tx ring information of the Hybrid device
2102 		 * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings.
2103 		 * Traffic being sent over the Hybrid resource by the mac layer
2104 		 * gets spread across both hw rings, as they are mapped to the
2105 		 * 2 pseudo tx rings in vnet.
2106 		 *
2107 		 * - RX path:
2108 		 * We expose a pseudo ring group with 3 pseudo rx rings (static
2109 		 * rings) initially. The first (default) pseudo rx ring is
2110 		 * reserved for the resource that connects to the vswitch
2111 		 * service. The next 2 rings are reserved for a Hybrid resource
2112 		 * that may be assigned to the vnet device. If and when a
2113 		 * Hybrid resource is assigned to the vnet, we obtain the rx
2114 		 * ring information of the Hybrid device (nxge) and map these
2115 		 * pseudo rings 1:1 to the 2 hw rx rings. For each additional
2116 		 * resource that connects to a peer vnet, we dynamically
2117 		 * allocate a pseudo rx ring and map it to that resource, when
2118 		 * the resource gets added; and the pseudo rx ring is
2119 		 * dynamically registered with the upper mac layer. We do the
2120 		 * reverse and unregister the ring with the mac layer when
2121 		 * the resource gets removed.
2122 		 *
2123 		 * Synchronization notes:
2124 		 * We don't need any lock to protect members of ring structure,
2125 		 * specifically ringp->hw_rh, in either the TX or the RX ring,
2126 		 * as explained below.
2127 		 * - TX ring:
2128 		 * ring->hw_rh is initialized only when a Hybrid resource is
2129 		 * associated; and gets referenced only in vnet_hio_tx(). The
2130 		 * Hybrid resource itself is available in fdb only after tx
2131 		 * hwrings are found and mapped; i.e, in vio_net_resource_reg()
2132 		 * we call vnet_bind_rings() first and then call
2133 		 * vnet_start_resources() which adds an entry to fdb. For
2134 		 * traffic going over LDC resources, we don't reference
2135 		 * ring->hw_rh at all.
2136 		 * - RX ring:
2137 		 * For rings mapped to Hybrid resource ring->hw_rh is
2138 		 * initialized and only then do we add the rx callback for
2139 		 * the underlying Hybrid resource; we disable callbacks before
2140 		 * we unmap ring->hw_rh. For rings mapped to LDC resources, we
2141 		 * stop the rx callbacks (in vgen) before we remove ring->hw_rh
2142 		 * (vio_net_resource_unreg()).
2143 		 * Also, we access ring->hw_rh in vnet_rx_ring_stat().
2144 		 * Note that for rings mapped to Hybrid resource, though the
2145 		 * rings are statically registered with the mac layer, its
2146 		 * hardware ring mapping (ringp->hw_rh) can be torn down in
2147 		 * vnet_unbind_hwrings() while the kstat operation is in
2148 		 * progress. To protect against this, we hold a reference to
2149 		 * the resource in FDB; this ensures that the thread in
2150 		 * vio_net_resource_unreg() waits for the reference to be
2151 		 * dropped before unbinding the ring.
2152 		 *
2153 		 * We don't need to do this for rings mapped to LDC resources.
2154 		 * These rings are registered/unregistered dynamically with
2155 		 * the mac layer and so any attempt to unregister the ring
2156 		 * while kstat operation is in progress will block in
2157 		 * mac_group_rem_ring(). Thus implicitly protects the
2158 		 * resource (ringp->hw_rh) from disappearing.
2159 		 */
2160 
2161 		if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
2162 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2163 
2164 			/*
2165 			 * The ring_cnt for rx grp is initialized in
2166 			 * vnet_ring_grp_init(). Later, the ring_cnt gets
2167 			 * updated dynamically whenever LDC resources are added
2168 			 * or removed.
2169 			 */
2170 			cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt;
2171 			cap_rings->mr_rget = vnet_get_ring;
2172 
2173 			cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS;
2174 			cap_rings->mr_gget = vnet_get_group;
2175 			cap_rings->mr_gaddring = NULL;
2176 			cap_rings->mr_gremring = NULL;
2177 		} else {
2178 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2179 
2180 			/*
2181 			 * The ring_cnt for tx grp is initialized in
2182 			 * vnet_ring_grp_init() and remains constant, as we
2183 			 * do not support dymanic tx rings for now.
2184 			 */
2185 			cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt;
2186 			cap_rings->mr_rget = vnet_get_ring;
2187 
2188 			/*
2189 			 * Transmit rings are not grouped; i.e, the number of
2190 			 * transmit ring groups advertised should be set to 0.
2191 			 */
2192 			cap_rings->mr_gnum = 0;
2193 
2194 			cap_rings->mr_gget = vnet_get_group;
2195 			cap_rings->mr_gaddring = NULL;
2196 			cap_rings->mr_gremring = NULL;
2197 		}
2198 		return (B_TRUE);
2199 
2200 	}
2201 
2202 	default:
2203 		break;
2204 
2205 	}
2206 
2207 	return (B_FALSE);
2208 }
2209 
2210 /*
2211  * Callback funtion for MAC layer to get ring information.
2212  */
2213 static void
2214 vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
2215     const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle)
2216 {
2217 	vnet_t	*vnetp = arg;
2218 
2219 	switch (rtype) {
2220 
2221 	case MAC_RING_TYPE_RX: {
2222 
2223 		vnet_pseudo_rx_group_t	*rx_grp;
2224 		vnet_pseudo_rx_ring_t	*rx_ringp;
2225 		mac_intr_t		*mintr;
2226 
2227 		/* We advertised only one RX group */
2228 		ASSERT(g_index == 0);
2229 		rx_grp = &vnetp->rx_grp[g_index];
2230 
2231 		/* Check the current # of rings in the rx group */
2232 		ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt));
2233 
2234 		/* Get the ring based on the index */
2235 		rx_ringp = &rx_grp->rings[r_index];
2236 
2237 		rx_ringp->handle = r_handle;
2238 		/*
2239 		 * Note: we don't need to save the incoming r_index in rx_ring,
2240 		 * as vnet_ring_grp_init() would have initialized the index for
2241 		 * each ring in the array.
2242 		 */
2243 		rx_ringp->grp = rx_grp;
2244 		rx_ringp->vnetp = vnetp;
2245 
2246 		mintr = &infop->mri_intr;
2247 		mintr->mi_handle = (mac_intr_handle_t)rx_ringp;
2248 		mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr;
2249 		mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr;
2250 
2251 		infop->mri_driver = (mac_ring_driver_t)rx_ringp;
2252 		infop->mri_start = vnet_rx_ring_start;
2253 		infop->mri_stop = vnet_rx_ring_stop;
2254 		infop->mri_stat = vnet_rx_ring_stat;
2255 
2256 		/* Set the poll function, as this is an rx ring */
2257 		infop->mri_poll = vnet_rx_poll;
2258 		/*
2259 		 * MAC_RING_RX_ENQUEUE bit needed to be set for nxge
2260 		 * which was not sending packet chains in interrupt
2261 		 * context. For such drivers, packets are queued in
2262 		 * Rx soft rings so that we get a chance to switch
2263 		 * into a polling mode under backlog. This bug (not
2264 		 * sending packet chains) has now been fixed. Once
2265 		 * the performance impact is measured, this change
2266 		 * will be removed.
2267 		 */
2268 		infop->mri_flags = (vnet_mac_rx_queuing ?
2269 		    MAC_RING_RX_ENQUEUE : 0);
2270 		break;
2271 	}
2272 
2273 	case MAC_RING_TYPE_TX: {
2274 		vnet_pseudo_tx_group_t	*tx_grp;
2275 		vnet_pseudo_tx_ring_t	*tx_ringp;
2276 
2277 		/*
2278 		 * No need to check grp index; mac layer passes -1 for it.
2279 		 */
2280 		tx_grp = &vnetp->tx_grp[0];
2281 
2282 		/* Check the # of rings in the tx group */
2283 		ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt));
2284 
2285 		/* Get the ring based on the index */
2286 		tx_ringp = &tx_grp->rings[r_index];
2287 
2288 		tx_ringp->handle = r_handle;
2289 		tx_ringp->index = r_index;
2290 		tx_ringp->grp = tx_grp;
2291 		tx_ringp->vnetp = vnetp;
2292 
2293 		infop->mri_driver = (mac_ring_driver_t)tx_ringp;
2294 		infop->mri_start = vnet_tx_ring_start;
2295 		infop->mri_stop = vnet_tx_ring_stop;
2296 		infop->mri_stat = vnet_tx_ring_stat;
2297 
2298 		/* Set the transmit function, as this is a tx ring */
2299 		infop->mri_tx = vnet_tx_ring_send;
2300 		/*
2301 		 * MAC_RING_TX_SERIALIZE bit needs to be set while
2302 		 * hybridIO is enabled to workaround tx lock
2303 		 * contention issues in nxge.
2304 		 */
2305 		infop->mri_flags = (vnet_mac_tx_serialize ?
2306 		    MAC_RING_TX_SERIALIZE : 0);
2307 		break;
2308 	}
2309 
2310 	default:
2311 		break;
2312 	}
2313 }
2314 
2315 /*
2316  * Callback funtion for MAC layer to get group information.
2317  */
2318 static void
2319 vnet_get_group(void *arg, mac_ring_type_t type, const int index,
2320 	mac_group_info_t *infop, mac_group_handle_t handle)
2321 {
2322 	vnet_t	*vnetp = (vnet_t *)arg;
2323 
2324 	switch (type) {
2325 
2326 	case MAC_RING_TYPE_RX:
2327 	{
2328 		vnet_pseudo_rx_group_t	*rx_grp;
2329 
2330 		/* We advertised only one RX group */
2331 		ASSERT(index == 0);
2332 
2333 		rx_grp = &vnetp->rx_grp[index];
2334 		rx_grp->handle = handle;
2335 		rx_grp->index = index;
2336 		rx_grp->vnetp = vnetp;
2337 
2338 		infop->mgi_driver = (mac_group_driver_t)rx_grp;
2339 		infop->mgi_start = NULL;
2340 		infop->mgi_stop = NULL;
2341 		infop->mgi_addmac = vnet_addmac;
2342 		infop->mgi_remmac = vnet_remmac;
2343 		infop->mgi_count = rx_grp->ring_cnt;
2344 
2345 		break;
2346 	}
2347 
2348 	case MAC_RING_TYPE_TX:
2349 	{
2350 		vnet_pseudo_tx_group_t	*tx_grp;
2351 
2352 		/* We advertised only one TX group */
2353 		ASSERT(index == 0);
2354 
2355 		tx_grp = &vnetp->tx_grp[index];
2356 		tx_grp->handle = handle;
2357 		tx_grp->index = index;
2358 		tx_grp->vnetp = vnetp;
2359 
2360 		infop->mgi_driver = (mac_group_driver_t)tx_grp;
2361 		infop->mgi_start = NULL;
2362 		infop->mgi_stop = NULL;
2363 		infop->mgi_addmac = NULL;
2364 		infop->mgi_remmac = NULL;
2365 		infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS;
2366 
2367 		break;
2368 	}
2369 
2370 	default:
2371 		break;
2372 
2373 	}
2374 }
2375 
2376 static int
2377 vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
2378 {
2379 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2380 	int			err;
2381 
2382 	/*
2383 	 * If this ring is mapped to a LDC resource, simply mark the state to
2384 	 * indicate the ring is started and return.
2385 	 */
2386 	if ((rx_ringp->state &
2387 	    (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
2388 		rx_ringp->gen_num = mr_gen_num;
2389 		rx_ringp->state |= VNET_RXRING_STARTED;
2390 		return (0);
2391 	}
2392 
2393 	ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2394 
2395 	/*
2396 	 * This must be a ring reserved for a hwring. If the hwring is not
2397 	 * bound yet, simply mark the state to indicate the ring is started and
2398 	 * return. If and when a hybrid resource is activated for this vnet
2399 	 * device, we will bind the hwring and start it then. If a hwring is
2400 	 * already bound, start it now.
2401 	 */
2402 	if (rx_ringp->hw_rh == NULL) {
2403 		rx_ringp->gen_num = mr_gen_num;
2404 		rx_ringp->state |= VNET_RXRING_STARTED;
2405 		return (0);
2406 	}
2407 
2408 	err = mac_hwring_start(rx_ringp->hw_rh);
2409 	if (err == 0) {
2410 		rx_ringp->gen_num = mr_gen_num;
2411 		rx_ringp->state |= VNET_RXRING_STARTED;
2412 	} else {
2413 		err = ENXIO;
2414 	}
2415 
2416 	return (err);
2417 }
2418 
2419 static void
2420 vnet_rx_ring_stop(mac_ring_driver_t arg)
2421 {
2422 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2423 
2424 	/*
2425 	 * If this ring is mapped to a LDC resource, simply mark the state to
2426 	 * indicate the ring is now stopped and return.
2427 	 */
2428 	if ((rx_ringp->state &
2429 	    (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
2430 		rx_ringp->state &= ~VNET_RXRING_STARTED;
2431 		return;
2432 	}
2433 
2434 	ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2435 
2436 	/*
2437 	 * This must be a ring reserved for a hwring. If the hwring is not
2438 	 * bound yet, simply mark the state to indicate the ring is stopped and
2439 	 * return. If a hwring is already bound, stop it now.
2440 	 */
2441 	if (rx_ringp->hw_rh == NULL) {
2442 		rx_ringp->state &= ~VNET_RXRING_STARTED;
2443 		return;
2444 	}
2445 
2446 	mac_hwring_stop(rx_ringp->hw_rh);
2447 	rx_ringp->state &= ~VNET_RXRING_STARTED;
2448 }
2449 
2450 static int
2451 vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
2452 {
2453 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)rdriver;
2454 	vnet_t			*vnetp = (vnet_t *)rx_ringp->vnetp;
2455 	vnet_res_t		*vresp;
2456 	mac_register_t		*macp;
2457 	mac_callbacks_t		*cbp;
2458 
2459 	/*
2460 	 * Refer to vnet_m_capab() function for detailed comments on ring
2461 	 * synchronization.
2462 	 */
2463 	if ((rx_ringp->state & VNET_RXRING_HYBRID) != 0) {
2464 		READ_ENTER(&vnetp->vsw_fp_rw);
2465 		if (vnetp->hio_fp == NULL) {
2466 			RW_EXIT(&vnetp->vsw_fp_rw);
2467 			return (0);
2468 		}
2469 
2470 		VNET_FDBE_REFHOLD(vnetp->hio_fp);
2471 		RW_EXIT(&vnetp->vsw_fp_rw);
2472 		(void) mac_hwring_getstat(rx_ringp->hw_rh, stat, val);
2473 		VNET_FDBE_REFRELE(vnetp->hio_fp);
2474 		return (0);
2475 	}
2476 
2477 	ASSERT((rx_ringp->state &
2478 	    (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0);
2479 	vresp = (vnet_res_t *)rx_ringp->hw_rh;
2480 	macp = &vresp->macreg;
2481 	cbp = macp->m_callbacks;
2482 
2483 	cbp->mc_getstat(macp->m_driver, stat, val);
2484 
2485 	return (0);
2486 }
2487 
2488 /* ARGSUSED */
2489 static int
2490 vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
2491 {
2492 	vnet_pseudo_tx_ring_t	*tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
2493 
2494 	tx_ringp->state |= VNET_TXRING_STARTED;
2495 	return (0);
2496 }
2497 
2498 static void
2499 vnet_tx_ring_stop(mac_ring_driver_t arg)
2500 {
2501 	vnet_pseudo_tx_ring_t	*tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
2502 
2503 	tx_ringp->state &= ~VNET_TXRING_STARTED;
2504 }
2505 
2506 static int
2507 vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
2508 {
2509 	vnet_pseudo_tx_ring_t	*tx_ringp = (vnet_pseudo_tx_ring_t *)rdriver;
2510 	vnet_tx_ring_stats_t	*statsp;
2511 
2512 	statsp = &tx_ringp->tx_ring_stats;
2513 
2514 	switch (stat) {
2515 	case MAC_STAT_OPACKETS:
2516 		*val = statsp->opackets;
2517 		break;
2518 
2519 	case MAC_STAT_OBYTES:
2520 		*val = statsp->obytes;
2521 		break;
2522 
2523 	default:
2524 		*val = 0;
2525 		return (ENOTSUP);
2526 	}
2527 
2528 	return (0);
2529 }
2530 
2531 /*
2532  * Disable polling for a ring and enable its interrupt.
2533  */
2534 static int
2535 vnet_ring_enable_intr(void *arg)
2536 {
2537 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2538 	vnet_res_t		*vresp;
2539 
2540 	if (rx_ringp->hw_rh == NULL) {
2541 		/*
2542 		 * Ring enable intr func is being invoked, but the ring is
2543 		 * not bound to any underlying resource ? This must be a ring
2544 		 * reserved for Hybrid resource and no such resource has been
2545 		 * assigned to this vnet device yet. We simply return success.
2546 		 */
2547 		ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2548 		return (0);
2549 	}
2550 
2551 	/*
2552 	 * The rx ring has been bound to either a LDC or a Hybrid resource.
2553 	 * Call the appropriate function to enable interrupts for the ring.
2554 	 */
2555 	if (rx_ringp->state & VNET_RXRING_HYBRID) {
2556 		return (mac_hwring_enable_intr(rx_ringp->hw_rh));
2557 	} else {
2558 		vresp = (vnet_res_t *)rx_ringp->hw_rh;
2559 		return (vgen_enable_intr(vresp->macreg.m_driver));
2560 	}
2561 }
2562 
2563 /*
2564  * Enable polling for a ring and disable its interrupt.
2565  */
2566 static int
2567 vnet_ring_disable_intr(void *arg)
2568 {
2569 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2570 	vnet_res_t		*vresp;
2571 
2572 	if (rx_ringp->hw_rh == NULL) {
2573 		/*
2574 		 * Ring disable intr func is being invoked, but the ring is
2575 		 * not bound to any underlying resource ? This must be a ring
2576 		 * reserved for Hybrid resource and no such resource has been
2577 		 * assigned to this vnet device yet. We simply return success.
2578 		 */
2579 		ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2580 		return (0);
2581 	}
2582 
2583 	/*
2584 	 * The rx ring has been bound to either a LDC or a Hybrid resource.
2585 	 * Call the appropriate function to disable interrupts for the ring.
2586 	 */
2587 	if (rx_ringp->state & VNET_RXRING_HYBRID) {
2588 		return (mac_hwring_disable_intr(rx_ringp->hw_rh));
2589 	} else {
2590 		vresp = (vnet_res_t *)rx_ringp->hw_rh;
2591 		return (vgen_disable_intr(vresp->macreg.m_driver));
2592 	}
2593 }
2594 
2595 /*
2596  * Poll 'bytes_to_pickup' bytes of message from the rx ring.
2597  */
2598 static mblk_t *
2599 vnet_rx_poll(void *arg, int bytes_to_pickup)
2600 {
2601 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2602 	mblk_t			*mp = NULL;
2603 	vnet_res_t		*vresp;
2604 	vnet_t			*vnetp = rx_ringp->vnetp;
2605 
2606 	if (rx_ringp->hw_rh == NULL) {
2607 		return (NULL);
2608 	}
2609 
2610 	if (rx_ringp->state & VNET_RXRING_HYBRID) {
2611 		mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup);
2612 		/*
2613 		 * Packets received over a hybrid resource need additional
2614 		 * processing to remove the tag, for the pvid case. The
2615 		 * underlying resource is not aware of the vnet's pvid and thus
2616 		 * packets are received with the vlan tag in the header; unlike
2617 		 * packets that are received over a ldc channel in which case
2618 		 * the peer vnet/vsw would have already removed the tag.
2619 		 */
2620 		if (vnetp->pvid != vnetp->default_vlan_id) {
2621 			vnet_rx_frames_untag(vnetp->pvid, &mp);
2622 		}
2623 	} else {
2624 		vresp = (vnet_res_t *)rx_ringp->hw_rh;
2625 		mp = vgen_rx_poll(vresp->macreg.m_driver, bytes_to_pickup);
2626 	}
2627 	return (mp);
2628 }
2629 
2630 /* ARGSUSED */
2631 void
2632 vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
2633 	boolean_t loopback)
2634 {
2635 	vnet_t			*vnetp = (vnet_t *)arg;
2636 	vnet_pseudo_rx_ring_t	*ringp = (vnet_pseudo_rx_ring_t *)mrh;
2637 
2638 	/*
2639 	 * Packets received over a hybrid resource need additional processing
2640 	 * to remove the tag, for the pvid case. The underlying resource is
2641 	 * not aware of the vnet's pvid and thus packets are received with the
2642 	 * vlan tag in the header; unlike packets that are received over a ldc
2643 	 * channel in which case the peer vnet/vsw would have already removed
2644 	 * the tag.
2645 	 */
2646 	if (vnetp->pvid != vnetp->default_vlan_id) {
2647 		vnet_rx_frames_untag(vnetp->pvid, &mp);
2648 		if (mp == NULL) {
2649 			return;
2650 		}
2651 	}
2652 	mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
2653 }
2654 
2655 static int
2656 vnet_addmac(void *arg, const uint8_t *mac_addr)
2657 {
2658 	vnet_pseudo_rx_group_t  *rx_grp = (vnet_pseudo_rx_group_t *)arg;
2659 	vnet_t			*vnetp;
2660 
2661 	vnetp = rx_grp->vnetp;
2662 
2663 	if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
2664 		return (0);
2665 	}
2666 
2667 	cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n",
2668 	    vnetp->instance, __func__);
2669 	return (EINVAL);
2670 }
2671 
2672 static int
2673 vnet_remmac(void *arg, const uint8_t *mac_addr)
2674 {
2675 	vnet_pseudo_rx_group_t  *rx_grp = (vnet_pseudo_rx_group_t *)arg;
2676 	vnet_t			*vnetp;
2677 
2678 	vnetp = rx_grp->vnetp;
2679 
2680 	if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
2681 		return (0);
2682 	}
2683 
2684 	cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n",
2685 	    vnetp->instance, __func__, ether_sprintf((void *)mac_addr));
2686 	return (EINVAL);
2687 }
2688 
2689 int
2690 vnet_hio_mac_init(vnet_t *vnetp, char *ifname)
2691 {
2692 	mac_handle_t		mh;
2693 	mac_client_handle_t	mch = NULL;
2694 	mac_unicast_handle_t	muh = NULL;
2695 	mac_diag_t		diag;
2696 	mac_register_t		*macp;
2697 	char			client_name[MAXNAMELEN];
2698 	int			rv;
2699 	uint16_t		mac_flags = MAC_UNICAST_TAG_DISABLE |
2700 	    MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY;
2701 	vio_net_callbacks_t	vcb;
2702 	ether_addr_t		rem_addr =
2703 		{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
2704 	uint32_t		retries = 0;
2705 
2706 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2707 		return (EAGAIN);
2708 	}
2709 
2710 	do {
2711 		rv = mac_open_by_linkname(ifname, &mh);
2712 		if (rv == 0) {
2713 			break;
2714 		}
2715 		if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) {
2716 			mac_free(macp);
2717 			return (rv);
2718 		}
2719 		drv_usecwait(vnet_mac_open_delay);
2720 	} while (rv == ENOENT);
2721 
2722 	vnetp->hio_mh = mh;
2723 
2724 	(void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance,
2725 	    ifname);
2726 	rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE);
2727 	if (rv != 0) {
2728 		goto fail;
2729 	}
2730 	vnetp->hio_mch = mch;
2731 
2732 	rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0,
2733 	    &diag);
2734 	if (rv != 0) {
2735 		goto fail;
2736 	}
2737 	vnetp->hio_muh = muh;
2738 
2739 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2740 	macp->m_driver = vnetp;
2741 	macp->m_dip = NULL;
2742 	macp->m_src_addr = NULL;
2743 	macp->m_callbacks = &vnet_hio_res_callbacks;
2744 	macp->m_min_sdu = 0;
2745 	macp->m_max_sdu = ETHERMTU;
2746 
2747 	rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID,
2748 	    vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb);
2749 	if (rv != 0) {
2750 		goto fail;
2751 	}
2752 	mac_free(macp);
2753 
2754 	/* add the recv callback */
2755 	mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp);
2756 
2757 	return (0);
2758 
2759 fail:
2760 	mac_free(macp);
2761 	vnet_hio_mac_cleanup(vnetp);
2762 	return (1);
2763 }
2764 
2765 void
2766 vnet_hio_mac_cleanup(vnet_t *vnetp)
2767 {
2768 	if (vnetp->hio_vhp != NULL) {
2769 		vio_net_resource_unreg(vnetp->hio_vhp);
2770 		vnetp->hio_vhp = NULL;
2771 	}
2772 
2773 	if (vnetp->hio_muh != NULL) {
2774 		(void) mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh);
2775 		vnetp->hio_muh = NULL;
2776 	}
2777 
2778 	if (vnetp->hio_mch != NULL) {
2779 		mac_client_close(vnetp->hio_mch, 0);
2780 		vnetp->hio_mch = NULL;
2781 	}
2782 
2783 	if (vnetp->hio_mh != NULL) {
2784 		mac_close(vnetp->hio_mh);
2785 		vnetp->hio_mh = NULL;
2786 	}
2787 }
2788 
2789 /* Bind pseudo rings to hwrings */
2790 static int
2791 vnet_bind_hwrings(vnet_t *vnetp)
2792 {
2793 	mac_ring_handle_t	hw_rh[VNET_NUM_HYBRID_RINGS];
2794 	mac_perim_handle_t	mph1;
2795 	vnet_pseudo_rx_group_t	*rx_grp;
2796 	vnet_pseudo_rx_ring_t	*rx_ringp;
2797 	vnet_pseudo_tx_group_t	*tx_grp;
2798 	vnet_pseudo_tx_ring_t	*tx_ringp;
2799 	int			hw_ring_cnt;
2800 	int			i;
2801 	int			rv;
2802 
2803 	mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
2804 
2805 	/* Get the list of the underlying RX rings. */
2806 	hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh,
2807 	    MAC_RING_TYPE_RX);
2808 
2809 	/* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */
2810 	if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
2811 		cmn_err(CE_WARN,
2812 		    "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n",
2813 		    vnetp->instance, hw_ring_cnt);
2814 		goto fail;
2815 	}
2816 
2817 	if (vnetp->rx_hwgh != NULL) {
2818 		/*
2819 		 * Quiesce the HW ring and the mac srs on the ring. Note
2820 		 * that the HW ring will be restarted when the pseudo ring
2821 		 * is started. At that time all the packets will be
2822 		 * directly passed up to the pseudo RX ring and handled
2823 		 * by mac srs created over the pseudo RX ring.
2824 		 */
2825 		mac_rx_client_quiesce(vnetp->hio_mch);
2826 		mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE);
2827 	}
2828 
2829 	/*
2830 	 * Bind the pseudo rings to the hwrings and start the hwrings.
2831 	 * Note we don't need to register these with the upper mac, as we have
2832 	 * statically exported these pseudo rxrings which are reserved for
2833 	 * rxrings of Hybrid resource.
2834 	 */
2835 	rx_grp = &vnetp->rx_grp[0];
2836 	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2837 		/* Pick the rxrings reserved for Hybrid resource */
2838 		rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
2839 
2840 		/* Store the hw ring handle */
2841 		rx_ringp->hw_rh = hw_rh[i];
2842 
2843 		/* Bind the pseudo ring to the underlying hwring */
2844 		mac_hwring_setup(rx_ringp->hw_rh,
2845 		    (mac_resource_handle_t)rx_ringp, NULL);
2846 
2847 		/* Start the hwring if needed */
2848 		if (rx_ringp->state & VNET_RXRING_STARTED) {
2849 			rv = mac_hwring_start(rx_ringp->hw_rh);
2850 			if (rv != 0) {
2851 				mac_hwring_teardown(rx_ringp->hw_rh);
2852 				rx_ringp->hw_rh = NULL;
2853 				goto fail;
2854 			}
2855 		}
2856 	}
2857 
2858 	/* Get the list of the underlying TX rings. */
2859 	hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh,
2860 	    MAC_RING_TYPE_TX);
2861 
2862 	/* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */
2863 	if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
2864 		cmn_err(CE_WARN,
2865 		    "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n",
2866 		    vnetp->instance, hw_ring_cnt);
2867 		goto fail;
2868 	}
2869 
2870 	/*
2871 	 * Now map the pseudo txrings to the hw txrings. Note we don't need
2872 	 * to register these with the upper mac, as we have statically exported
2873 	 * these rings. Note that these rings will continue to be used for LDC
2874 	 * resources to peer vnets and vswitch (shared ring).
2875 	 */
2876 	tx_grp = &vnetp->tx_grp[0];
2877 	for (i = 0; i < tx_grp->ring_cnt; i++) {
2878 		tx_ringp = &tx_grp->rings[i];
2879 		tx_ringp->hw_rh = hw_rh[i];
2880 		tx_ringp->state |= VNET_TXRING_HYBRID;
2881 	}
2882 	tx_grp->tx_notify_handle =
2883 	    mac_client_tx_notify(vnetp->hio_mch, vnet_tx_ring_update, vnetp);
2884 
2885 	mac_perim_exit(mph1);
2886 	return (0);
2887 
2888 fail:
2889 	mac_perim_exit(mph1);
2890 	vnet_unbind_hwrings(vnetp);
2891 	return (1);
2892 }
2893 
2894 /* Unbind pseudo rings from hwrings */
2895 static void
2896 vnet_unbind_hwrings(vnet_t *vnetp)
2897 {
2898 	mac_perim_handle_t	mph1;
2899 	vnet_pseudo_rx_ring_t	*rx_ringp;
2900 	vnet_pseudo_rx_group_t	*rx_grp;
2901 	vnet_pseudo_tx_group_t	*tx_grp;
2902 	vnet_pseudo_tx_ring_t	*tx_ringp;
2903 	int			i;
2904 
2905 	mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
2906 
2907 	tx_grp = &vnetp->tx_grp[0];
2908 	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2909 		tx_ringp = &tx_grp->rings[i];
2910 		if (tx_ringp->state & VNET_TXRING_HYBRID) {
2911 			tx_ringp->state &= ~VNET_TXRING_HYBRID;
2912 			tx_ringp->hw_rh = NULL;
2913 		}
2914 	}
2915 	(void) mac_client_tx_notify(vnetp->hio_mch, NULL,
2916 	    tx_grp->tx_notify_handle);
2917 
2918 	rx_grp = &vnetp->rx_grp[0];
2919 	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2920 		rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
2921 		if (rx_ringp->hw_rh != NULL) {
2922 			/* Stop the hwring */
2923 			mac_hwring_stop(rx_ringp->hw_rh);
2924 
2925 			/* Teardown the hwring */
2926 			mac_hwring_teardown(rx_ringp->hw_rh);
2927 			rx_ringp->hw_rh = NULL;
2928 		}
2929 	}
2930 
2931 	if (vnetp->rx_hwgh != NULL) {
2932 		vnetp->rx_hwgh = NULL;
2933 		/*
2934 		 * First clear the permanent-quiesced flag of the RX srs then
2935 		 * restart the HW ring and the mac srs on the ring.
2936 		 */
2937 		mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE);
2938 		mac_rx_client_restart(vnetp->hio_mch);
2939 	}
2940 
2941 	mac_perim_exit(mph1);
2942 }
2943 
2944 /* Bind pseudo ring to a LDC resource */
2945 static int
2946 vnet_bind_vgenring(vnet_res_t *vresp)
2947 {
2948 	vnet_t			*vnetp;
2949 	vnet_pseudo_rx_group_t	*rx_grp;
2950 	vnet_pseudo_rx_ring_t	*rx_ringp;
2951 	mac_perim_handle_t	mph1;
2952 	int			rv;
2953 	int			type;
2954 
2955 	vnetp = vresp->vnetp;
2956 	type = vresp->type;
2957 	rx_grp = &vnetp->rx_grp[0];
2958 
2959 	if (type == VIO_NET_RES_LDC_SERVICE) {
2960 		/*
2961 		 * Ring Index 0 is the default ring in the group and is
2962 		 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
2963 		 * is allocated statically and is reported to the mac layer
2964 		 * in vnet_m_capab(). So, all we need to do here, is save a
2965 		 * reference to the associated vresp.
2966 		 */
2967 		rx_ringp = &rx_grp->rings[0];
2968 		rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
2969 		vresp->rx_ringp = (void *)rx_ringp;
2970 		return (0);
2971 	}
2972 	ASSERT(type == VIO_NET_RES_LDC_GUEST);
2973 
2974 	mac_perim_enter_by_mh(vnetp->mh, &mph1);
2975 
2976 	rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp);
2977 	if (rx_ringp == NULL) {
2978 		cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring",
2979 		    vnetp->instance);
2980 		goto fail;
2981 	}
2982 
2983 	/* Store the LDC resource itself as the ring handle */
2984 	rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
2985 
2986 	/*
2987 	 * Save a reference to the ring in the resource for lookup during
2988 	 * unbind. Note this is only done for LDC resources. We don't need this
2989 	 * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its
2990 	 * rx rings are mapped to reserved pseudo rx rings (index 1 and 2).
2991 	 */
2992 	vresp->rx_ringp = (void *)rx_ringp;
2993 	rx_ringp->state |= VNET_RXRING_LDC_GUEST;
2994 
2995 	/* Register the pseudo ring with upper-mac */
2996 	rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index);
2997 	if (rv != 0) {
2998 		rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
2999 		rx_ringp->hw_rh = NULL;
3000 		vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
3001 		goto fail;
3002 	}
3003 
3004 	mac_perim_exit(mph1);
3005 	return (0);
3006 fail:
3007 	mac_perim_exit(mph1);
3008 	return (1);
3009 }
3010 
3011 /* Unbind pseudo ring from a LDC resource */
3012 static void
3013 vnet_unbind_vgenring(vnet_res_t *vresp)
3014 {
3015 	vnet_t			*vnetp;
3016 	vnet_pseudo_rx_group_t	*rx_grp;
3017 	vnet_pseudo_rx_ring_t	*rx_ringp;
3018 	mac_perim_handle_t	mph1;
3019 	int			type;
3020 
3021 	vnetp = vresp->vnetp;
3022 	type = vresp->type;
3023 	rx_grp = &vnetp->rx_grp[0];
3024 
3025 	if (vresp->rx_ringp == NULL) {
3026 		return;
3027 	}
3028 
3029 	if (type == VIO_NET_RES_LDC_SERVICE) {
3030 		/*
3031 		 * Ring Index 0 is the default ring in the group and is
3032 		 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
3033 		 * is allocated statically and is reported to the mac layer
3034 		 * in vnet_m_capab(). So, all we need to do here, is remove its
3035 		 * reference to the associated vresp.
3036 		 */
3037 		rx_ringp = &rx_grp->rings[0];
3038 		rx_ringp->hw_rh = NULL;
3039 		vresp->rx_ringp = NULL;
3040 		return;
3041 	}
3042 	ASSERT(type == VIO_NET_RES_LDC_GUEST);
3043 
3044 	mac_perim_enter_by_mh(vnetp->mh, &mph1);
3045 
3046 	rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp;
3047 	vresp->rx_ringp = NULL;
3048 
3049 	if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) {
3050 		/* Unregister the pseudo ring with upper-mac */
3051 		mac_group_rem_ring(rx_grp->handle, rx_ringp->handle);
3052 
3053 		rx_ringp->hw_rh = NULL;
3054 		rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
3055 
3056 		/* Free the pseudo rx ring */
3057 		vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
3058 	}
3059 
3060 	mac_perim_exit(mph1);
3061 }
3062 
3063 static void
3064 vnet_unbind_rings(vnet_res_t *vresp)
3065 {
3066 	switch (vresp->type) {
3067 
3068 	case VIO_NET_RES_LDC_SERVICE:
3069 	case VIO_NET_RES_LDC_GUEST:
3070 		vnet_unbind_vgenring(vresp);
3071 		break;
3072 
3073 	case VIO_NET_RES_HYBRID:
3074 		vnet_unbind_hwrings(vresp->vnetp);
3075 		break;
3076 
3077 	default:
3078 		break;
3079 
3080 	}
3081 }
3082 
3083 static int
3084 vnet_bind_rings(vnet_res_t *vresp)
3085 {
3086 	int	rv;
3087 
3088 	switch (vresp->type) {
3089 
3090 	case VIO_NET_RES_LDC_SERVICE:
3091 	case VIO_NET_RES_LDC_GUEST:
3092 		rv = vnet_bind_vgenring(vresp);
3093 		break;
3094 
3095 	case VIO_NET_RES_HYBRID:
3096 		rv = vnet_bind_hwrings(vresp->vnetp);
3097 		break;
3098 
3099 	default:
3100 		rv = 1;
3101 		break;
3102 
3103 	}
3104 
3105 	return (rv);
3106 }
3107 
3108 /* ARGSUSED */
3109 int
3110 vnet_hio_stat(void *arg, uint_t stat, uint64_t *val)
3111 {
3112 	vnet_t	*vnetp = (vnet_t *)arg;
3113 
3114 	*val = mac_stat_get(vnetp->hio_mh, stat);
3115 	return (0);
3116 }
3117 
3118 /*
3119  * The start() and stop() routines for the Hybrid resource below, are just
3120  * dummy functions. This is provided to avoid resource type specific code in
3121  * vnet_start_resources() and vnet_stop_resources(). The starting and stopping
3122  * of the Hybrid resource happens in the context of the mac_client interfaces
3123  * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup().
3124  */
3125 /* ARGSUSED */
3126 static int
3127 vnet_hio_start(void *arg)
3128 {
3129 	return (0);
3130 }
3131 
3132 /* ARGSUSED */
3133 static void
3134 vnet_hio_stop(void *arg)
3135 {
3136 }
3137 
3138 mblk_t *
3139 vnet_hio_tx(void *arg, mblk_t *mp)
3140 {
3141 	vnet_pseudo_tx_ring_t	*tx_ringp;
3142 	mblk_t			*nextp;
3143 	mblk_t			*ret_mp;
3144 
3145 	tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
3146 	for (;;) {
3147 		nextp = mp->b_next;
3148 		mp->b_next = NULL;
3149 
3150 		ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp);
3151 		if (ret_mp != NULL) {
3152 			ret_mp->b_next = nextp;
3153 			mp = ret_mp;
3154 			break;
3155 		}
3156 
3157 		if ((mp = nextp) == NULL)
3158 			break;
3159 	}
3160 	return (mp);
3161 }
3162 
3163 #ifdef	VNET_IOC_DEBUG
3164 
3165 /*
3166  * The ioctl entry point is used only for debugging for now. The ioctl commands
3167  * can be used to force the link state of the channel connected to vsw.
3168  */
3169 static void
3170 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
3171 {
3172 	struct iocblk	*iocp;
3173 	vnet_t		*vnetp;
3174 
3175 	iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
3176 	iocp->ioc_error = 0;
3177 	vnetp = (vnet_t *)arg;
3178 
3179 	if (vnetp == NULL) {
3180 		miocnak(q, mp, 0, EINVAL);
3181 		return;
3182 	}
3183 
3184 	switch (iocp->ioc_cmd) {
3185 
3186 	case VNET_FORCE_LINK_DOWN:
3187 	case VNET_FORCE_LINK_UP:
3188 		vnet_force_link_state(vnetp, q, mp);
3189 		break;
3190 
3191 	default:
3192 		iocp->ioc_error = EINVAL;
3193 		miocnak(q, mp, 0, iocp->ioc_error);
3194 		break;
3195 
3196 	}
3197 }
3198 
3199 static void
3200 vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp)
3201 {
3202 	mac_register_t	*macp;
3203 	mac_callbacks_t	*cbp;
3204 	vnet_res_t	*vresp;
3205 
3206 	READ_ENTER(&vnetp->vsw_fp_rw);
3207 
3208 	vresp = vnetp->vsw_fp;
3209 	if (vresp == NULL) {
3210 		RW_EXIT(&vnetp->vsw_fp_rw);
3211 		return;
3212 	}
3213 
3214 	macp = &vresp->macreg;
3215 	cbp = macp->m_callbacks;
3216 	cbp->mc_ioctl(macp->m_driver, q, mp);
3217 
3218 	RW_EXIT(&vnetp->vsw_fp_rw);
3219 }
3220 
3221 #else
3222 
3223 static void
3224 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
3225 {
3226 	vnet_t		*vnetp;
3227 
3228 	vnetp = (vnet_t *)arg;
3229 
3230 	if (vnetp == NULL) {
3231 		miocnak(q, mp, 0, EINVAL);
3232 		return;
3233 	}
3234 
3235 	/* ioctl support only for debugging */
3236 	miocnak(q, mp, 0, ENOTSUP);
3237 }
3238 
3239 #endif
3240