xref: /titanic_52/usr/src/uts/sun4v/io/vnet.c (revision b695575577bae0337af339d76949713bfe1c9013)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/param.h>
30 #include <sys/stream.h>
31 #include <sys/kmem.h>
32 #include <sys/conf.h>
33 #include <sys/devops.h>
34 #include <sys/ksynch.h>
35 #include <sys/stat.h>
36 #include <sys/modctl.h>
37 #include <sys/modhash.h>
38 #include <sys/debug.h>
39 #include <sys/ethernet.h>
40 #include <sys/dlpi.h>
41 #include <net/if.h>
42 #include <sys/mac_provider.h>
43 #include <sys/mac_ether.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/strsun.h>
47 #include <sys/note.h>
48 #include <sys/atomic.h>
49 #include <sys/vnet.h>
50 #include <sys/vlan.h>
51 #include <sys/vnet_mailbox.h>
52 #include <sys/vnet_common.h>
53 #include <sys/dds.h>
54 #include <sys/strsubr.h>
55 #include <sys/taskq.h>
56 
57 /*
58  * Function prototypes.
59  */
60 
61 /* DDI entrypoints */
62 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
63 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
64 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
65 
66 /* MAC entrypoints  */
67 static int vnet_m_stat(void *, uint_t, uint64_t *);
68 static int vnet_m_start(void *);
69 static void vnet_m_stop(void *);
70 static int vnet_m_promisc(void *, boolean_t);
71 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
72 static int vnet_m_unicst(void *, const uint8_t *);
73 mblk_t *vnet_m_tx(void *, mblk_t *);
74 static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp);
75 #ifdef	VNET_IOC_DEBUG
76 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp);
77 #endif
78 
79 /* vnet internal functions */
80 static int vnet_unattach(vnet_t *vnetp);
81 static int vnet_mac_register(vnet_t *);
82 static int vnet_read_mac_address(vnet_t *vnetp);
83 
84 /* Forwarding database (FDB) routines */
85 static void vnet_fdb_create(vnet_t *vnetp);
86 static void vnet_fdb_destroy(vnet_t *vnetp);
87 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp);
88 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
89 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp);
90 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp);
91 
92 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp);
93 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp);
94 static void vnet_tx_update(vio_net_handle_t vrh);
95 static void vnet_res_start_task(void *arg);
96 static void vnet_start_resources(vnet_t *vnetp);
97 static void vnet_stop_resources(vnet_t *vnetp);
98 static void vnet_dispatch_res_task(vnet_t *vnetp);
99 static void vnet_res_start_task(void *arg);
100 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
101 
102 /* Exported to vnet_gen */
103 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
104 void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
105 
106 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name,
107     vnet_res_t *vresp);
108 static int vnet_hio_update_kstats(kstat_t *ksp, int rw);
109 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp);
110 static void vnet_hio_destroy_kstats(kstat_t *ksp);
111 
112 /* Exported to to vnet_dds */
113 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
114 
115 /* Externs that are imported from vnet_gen */
116 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
117     const uint8_t *macaddr, void **vgenhdl);
118 extern void vgen_uninit(void *arg);
119 extern int vgen_dds_tx(void *arg, void *dmsg);
120 extern void vgen_mod_init(void);
121 extern int vgen_mod_cleanup(void);
122 extern void vgen_mod_fini(void);
123 
124 /* Externs that are imported from vnet_dds */
125 extern void vdds_mod_init(void);
126 extern void vdds_mod_fini(void);
127 extern int vdds_init(vnet_t *vnetp);
128 extern void vdds_cleanup(vnet_t *vnetp);
129 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
130 extern void vdds_cleanup_hybrid_res(void *arg);
131 
132 #define	DRV_NAME	"vnet"
133 #define	VNET_FDBE_REFHOLD(p)						\
134 {									\
135 	atomic_inc_32(&(p)->refcnt);					\
136 	ASSERT((p)->refcnt != 0);					\
137 }
138 
139 #define	VNET_FDBE_REFRELE(p)						\
140 {									\
141 	ASSERT((p)->refcnt != 0);					\
142 	atomic_dec_32(&(p)->refcnt);					\
143 }
144 
145 #ifdef	VNET_IOC_DEBUG
146 #define	VNET_M_CALLBACK_FLAGS	(MC_IOCTL)
147 #else
148 #define	VNET_M_CALLBACK_FLAGS	(0)
149 #endif
150 
151 static mac_callbacks_t vnet_m_callbacks = {
152 	VNET_M_CALLBACK_FLAGS,
153 	vnet_m_stat,
154 	vnet_m_start,
155 	vnet_m_stop,
156 	vnet_m_promisc,
157 	vnet_m_multicst,
158 	vnet_m_unicst,
159 	vnet_m_tx,
160 	vnet_m_ioctl,
161 	NULL,
162 	NULL
163 };
164 
165 /*
166  * Linked list of "vnet_t" structures - one per instance.
167  */
168 static vnet_t	*vnet_headp = NULL;
169 static krwlock_t vnet_rw;
170 
171 /* Tunables */
172 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
173 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
174 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
175 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
176 
177 /*
178  * Set this to non-zero to enable additional internal receive buffer pools
179  * based on the MTU of the device for better performance at the cost of more
180  * memory consumption. This is turned off by default, to use allocb(9F) for
181  * receive buffer allocations of sizes > 2K.
182  */
183 boolean_t vnet_jumbo_rxpools = B_FALSE;
184 
185 /* # of chains in fdb hash table */
186 uint32_t	vnet_fdb_nchains = VNET_NFDB_HASH;
187 
188 /* Internal tunables */
189 uint32_t	vnet_ethermtu = 1500;	/* mtu of the device */
190 
191 /*
192  * Default vlan id. This is only used internally when the "default-vlan-id"
193  * property is not present in the MD device node. Therefore, this should not be
194  * used as a tunable; if this value is changed, the corresponding variable
195  * should be updated to the same value in vsw and also other vnets connected to
196  * the same vsw.
197  */
198 uint16_t	vnet_default_vlan_id = 1;
199 
200 /* delay in usec to wait for all references on a fdb entry to be dropped */
201 uint32_t vnet_fdbe_refcnt_delay = 10;
202 
203 static struct ether_addr etherbroadcastaddr = {
204 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
205 };
206 
207 
208 /*
209  * Property names
210  */
211 static char macaddr_propname[] = "local-mac-address";
212 
213 /*
214  * This is the string displayed by modinfo(1m).
215  */
216 static char vnet_ident[] = "vnet driver";
217 extern struct mod_ops mod_driverops;
218 static struct cb_ops cb_vnetops = {
219 	nulldev,		/* cb_open */
220 	nulldev,		/* cb_close */
221 	nodev,			/* cb_strategy */
222 	nodev,			/* cb_print */
223 	nodev,			/* cb_dump */
224 	nodev,			/* cb_read */
225 	nodev,			/* cb_write */
226 	nodev,			/* cb_ioctl */
227 	nodev,			/* cb_devmap */
228 	nodev,			/* cb_mmap */
229 	nodev,			/* cb_segmap */
230 	nochpoll,		/* cb_chpoll */
231 	ddi_prop_op,		/* cb_prop_op */
232 	NULL,			/* cb_stream */
233 	(int)(D_MP)		/* cb_flag */
234 };
235 
236 static struct dev_ops vnetops = {
237 	DEVO_REV,		/* devo_rev */
238 	0,			/* devo_refcnt */
239 	NULL,			/* devo_getinfo */
240 	nulldev,		/* devo_identify */
241 	nulldev,		/* devo_probe */
242 	vnetattach,		/* devo_attach */
243 	vnetdetach,		/* devo_detach */
244 	nodev,			/* devo_reset */
245 	&cb_vnetops,		/* devo_cb_ops */
246 	(struct bus_ops *)NULL,	/* devo_bus_ops */
247 	NULL,			/* devo_power */
248 	ddi_quiesce_not_supported,	/* devo_quiesce */
249 };
250 
251 static struct modldrv modldrv = {
252 	&mod_driverops,		/* Type of module.  This one is a driver */
253 	vnet_ident,		/* ID string */
254 	&vnetops		/* driver specific ops */
255 };
256 
257 static struct modlinkage modlinkage = {
258 	MODREV_1, (void *)&modldrv, NULL
259 };
260 
261 #ifdef DEBUG
262 
263 /*
264  * Print debug messages - set to 0xf to enable all msgs
265  */
266 int vnet_dbglevel = 0x8;
267 
268 static void
269 debug_printf(const char *fname, void *arg, const char *fmt, ...)
270 {
271 	char    buf[512];
272 	va_list ap;
273 	vnet_t *vnetp = (vnet_t *)arg;
274 	char    *bufp = buf;
275 
276 	if (vnetp == NULL) {
277 		(void) sprintf(bufp, "%s: ", fname);
278 		bufp += strlen(bufp);
279 	} else {
280 		(void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
281 		bufp += strlen(bufp);
282 	}
283 	va_start(ap, fmt);
284 	(void) vsprintf(bufp, fmt, ap);
285 	va_end(ap);
286 	cmn_err(CE_CONT, "%s\n", buf);
287 }
288 
289 #endif
290 
291 /* _init(9E): initialize the loadable module */
292 int
293 _init(void)
294 {
295 	int status;
296 
297 	DBG1(NULL, "enter\n");
298 
299 	mac_init_ops(&vnetops, "vnet");
300 	status = mod_install(&modlinkage);
301 	if (status != 0) {
302 		mac_fini_ops(&vnetops);
303 	}
304 	vdds_mod_init();
305 	vgen_mod_init();
306 	DBG1(NULL, "exit(%d)\n", status);
307 	return (status);
308 }
309 
310 /* _fini(9E): prepare the module for unloading. */
311 int
312 _fini(void)
313 {
314 	int		status;
315 
316 	DBG1(NULL, "enter\n");
317 
318 	status = vgen_mod_cleanup();
319 	if (status != 0)
320 		return (status);
321 
322 	status = mod_remove(&modlinkage);
323 	if (status != 0)
324 		return (status);
325 	mac_fini_ops(&vnetops);
326 	vgen_mod_fini();
327 	vdds_mod_fini();
328 
329 	DBG1(NULL, "exit(%d)\n", status);
330 	return (status);
331 }
332 
333 /* _info(9E): return information about the loadable module */
334 int
335 _info(struct modinfo *modinfop)
336 {
337 	return (mod_info(&modlinkage, modinfop));
338 }
339 
340 /*
341  * attach(9E): attach a device to the system.
342  * called once for each instance of the device on the system.
343  */
344 static int
345 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
346 {
347 	vnet_t			*vnetp;
348 	int			status;
349 	int			instance;
350 	uint64_t		reg;
351 	char			qname[TASKQ_NAMELEN];
352 	vnet_attach_progress_t	attach_progress;
353 
354 	attach_progress = AST_init;
355 
356 	switch (cmd) {
357 	case DDI_ATTACH:
358 		break;
359 	case DDI_RESUME:
360 	case DDI_PM_RESUME:
361 	default:
362 		goto vnet_attach_fail;
363 	}
364 
365 	instance = ddi_get_instance(dip);
366 	DBG1(NULL, "instance(%d) enter\n", instance);
367 
368 	/* allocate vnet_t and mac_t structures */
369 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
370 	vnetp->dip = dip;
371 	vnetp->instance = instance;
372 	rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL);
373 	rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
374 	attach_progress |= AST_vnet_alloc;
375 
376 	status = vdds_init(vnetp);
377 	if (status != 0) {
378 		goto vnet_attach_fail;
379 	}
380 	attach_progress |= AST_vdds_init;
381 
382 	/* setup links to vnet_t from both devinfo and mac_t */
383 	ddi_set_driver_private(dip, (caddr_t)vnetp);
384 
385 	/* read the mac address */
386 	status = vnet_read_mac_address(vnetp);
387 	if (status != DDI_SUCCESS) {
388 		goto vnet_attach_fail;
389 	}
390 	attach_progress |= AST_read_macaddr;
391 
392 	reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
393 	    DDI_PROP_DONTPASS, "reg", -1);
394 	if (reg == -1) {
395 		goto vnet_attach_fail;
396 	}
397 	vnetp->reg = reg;
398 
399 	vnet_fdb_create(vnetp);
400 	attach_progress |= AST_fdbh_alloc;
401 
402 	(void) snprintf(qname, TASKQ_NAMELEN, "vnet_taskq%d", instance);
403 	if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1,
404 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
405 		cmn_err(CE_WARN, "!vnet%d: Unable to create task queue",
406 		    instance);
407 		goto vnet_attach_fail;
408 	}
409 	attach_progress |= AST_taskq_create;
410 
411 	/* add to the list of vnet devices */
412 	WRITE_ENTER(&vnet_rw);
413 	vnetp->nextp = vnet_headp;
414 	vnet_headp = vnetp;
415 	RW_EXIT(&vnet_rw);
416 
417 	attach_progress |= AST_vnet_list;
418 
419 	/*
420 	 * Initialize the generic vnet plugin which provides
421 	 * communication via sun4v LDC (logical domain channel) based
422 	 * resources. It will register the LDC resources as and when
423 	 * they become available.
424 	 */
425 	status = vgen_init(vnetp, reg, vnetp->dip,
426 	    (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
427 	if (status != DDI_SUCCESS) {
428 		DERR(vnetp, "vgen_init() failed\n");
429 		goto vnet_attach_fail;
430 	}
431 	attach_progress |= AST_vgen_init;
432 
433 	/* register with MAC layer */
434 	status = vnet_mac_register(vnetp);
435 	if (status != DDI_SUCCESS) {
436 		goto vnet_attach_fail;
437 	}
438 	vnetp->link_state = LINK_STATE_UNKNOWN;
439 
440 	attach_progress |= AST_macreg;
441 
442 	vnetp->attach_progress = attach_progress;
443 
444 	DBG1(NULL, "instance(%d) exit\n", instance);
445 	return (DDI_SUCCESS);
446 
447 vnet_attach_fail:
448 	vnetp->attach_progress = attach_progress;
449 	status = vnet_unattach(vnetp);
450 	ASSERT(status == 0);
451 	return (DDI_FAILURE);
452 }
453 
454 /*
455  * detach(9E): detach a device from the system.
456  */
457 static int
458 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
459 {
460 	vnet_t		*vnetp;
461 	int		instance;
462 
463 	instance = ddi_get_instance(dip);
464 	DBG1(NULL, "instance(%d) enter\n", instance);
465 
466 	vnetp = ddi_get_driver_private(dip);
467 	if (vnetp == NULL) {
468 		goto vnet_detach_fail;
469 	}
470 
471 	switch (cmd) {
472 	case DDI_DETACH:
473 		break;
474 	case DDI_SUSPEND:
475 	case DDI_PM_SUSPEND:
476 	default:
477 		goto vnet_detach_fail;
478 	}
479 
480 	if (vnet_unattach(vnetp) != 0) {
481 		goto vnet_detach_fail;
482 	}
483 
484 	return (DDI_SUCCESS);
485 
486 vnet_detach_fail:
487 	return (DDI_FAILURE);
488 }
489 
490 /*
491  * Common routine to handle vnetattach() failure and vnetdetach(). Note that
492  * the only reason this function could fail is if mac_unregister() fails.
493  * Otherwise, this function must ensure that all resources are freed and return
494  * success.
495  */
496 static int
497 vnet_unattach(vnet_t *vnetp)
498 {
499 	vnet_attach_progress_t	attach_progress;
500 
501 	attach_progress = vnetp->attach_progress;
502 
503 	/*
504 	 * Unregister from the gldv3 subsystem. This can fail, in particular
505 	 * if there are still any open references to this mac device; in which
506 	 * case we just return failure without continuing to detach further.
507 	 */
508 	if (attach_progress & AST_macreg) {
509 		if (mac_unregister(vnetp->mh) != 0) {
510 			return (1);
511 		}
512 		attach_progress &= ~AST_macreg;
513 	}
514 
515 	/*
516 	 * Now that we have unregistered from gldv3, we must finish all other
517 	 * steps and successfully return from this function; otherwise we will
518 	 * end up leaving the device in a broken/unusable state.
519 	 *
520 	 * First, release any hybrid resources assigned to this vnet device.
521 	 */
522 	if (attach_progress & AST_vdds_init) {
523 		vdds_cleanup(vnetp);
524 		attach_progress &= ~AST_vdds_init;
525 	}
526 
527 	/*
528 	 * Uninit vgen. This stops further mdeg callbacks to this vnet
529 	 * device and/or its ports; and detaches any existing ports.
530 	 */
531 	if (attach_progress & AST_vgen_init) {
532 		vgen_uninit(vnetp->vgenhdl);
533 		attach_progress &= ~AST_vgen_init;
534 	}
535 
536 	/* Destroy the taskq. */
537 	if (attach_progress & AST_taskq_create) {
538 		ddi_taskq_destroy(vnetp->taskqp);
539 		attach_progress &= ~AST_taskq_create;
540 	}
541 
542 	/* Destroy fdb. */
543 	if (attach_progress & AST_fdbh_alloc) {
544 		vnet_fdb_destroy(vnetp);
545 		attach_progress &= ~AST_fdbh_alloc;
546 	}
547 
548 	/* Remove from the device list */
549 	if (attach_progress & AST_vnet_list) {
550 		vnet_t		**vnetpp;
551 		/* unlink from instance(vnet_t) list */
552 		WRITE_ENTER(&vnet_rw);
553 		for (vnetpp = &vnet_headp; *vnetpp;
554 		    vnetpp = &(*vnetpp)->nextp) {
555 			if (*vnetpp == vnetp) {
556 				*vnetpp = vnetp->nextp;
557 				break;
558 			}
559 		}
560 		RW_EXIT(&vnet_rw);
561 		attach_progress &= ~AST_vnet_list;
562 	}
563 
564 	if (attach_progress & AST_vnet_alloc) {
565 		rw_destroy(&vnetp->vrwlock);
566 		rw_destroy(&vnetp->vsw_fp_rw);
567 		attach_progress &= ~AST_vnet_list;
568 		KMEM_FREE(vnetp);
569 	}
570 
571 	return (0);
572 }
573 
574 /* enable the device for transmit/receive */
575 static int
576 vnet_m_start(void *arg)
577 {
578 	vnet_t		*vnetp = arg;
579 
580 	DBG1(vnetp, "enter\n");
581 
582 	WRITE_ENTER(&vnetp->vrwlock);
583 	vnetp->flags |= VNET_STARTED;
584 	vnet_start_resources(vnetp);
585 	RW_EXIT(&vnetp->vrwlock);
586 
587 	DBG1(vnetp, "exit\n");
588 	return (VNET_SUCCESS);
589 
590 }
591 
592 /* stop transmit/receive for the device */
593 static void
594 vnet_m_stop(void *arg)
595 {
596 	vnet_t		*vnetp = arg;
597 
598 	DBG1(vnetp, "enter\n");
599 
600 	WRITE_ENTER(&vnetp->vrwlock);
601 	if (vnetp->flags & VNET_STARTED) {
602 		vnet_stop_resources(vnetp);
603 		vnetp->flags &= ~VNET_STARTED;
604 	}
605 	RW_EXIT(&vnetp->vrwlock);
606 
607 	DBG1(vnetp, "exit\n");
608 }
609 
610 /* set the unicast mac address of the device */
611 static int
612 vnet_m_unicst(void *arg, const uint8_t *macaddr)
613 {
614 	_NOTE(ARGUNUSED(macaddr))
615 
616 	vnet_t *vnetp = arg;
617 
618 	DBG1(vnetp, "enter\n");
619 	/*
620 	 * NOTE: setting mac address dynamically is not supported.
621 	 */
622 	DBG1(vnetp, "exit\n");
623 
624 	return (VNET_FAILURE);
625 }
626 
627 /* enable/disable a multicast address */
628 static int
629 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
630 {
631 	_NOTE(ARGUNUSED(add, mca))
632 
633 	vnet_t *vnetp = arg;
634 	vnet_res_t	*vresp;
635 	mac_register_t	*macp;
636 	mac_callbacks_t	*cbp;
637 	int rv = VNET_SUCCESS;
638 
639 	DBG1(vnetp, "enter\n");
640 
641 	READ_ENTER(&vnetp->vrwlock);
642 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
643 		if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
644 			macp = &vresp->macreg;
645 			cbp = macp->m_callbacks;
646 			rv = cbp->mc_multicst(macp->m_driver, add, mca);
647 		}
648 	}
649 	RW_EXIT(&vnetp->vrwlock);
650 
651 	DBG1(vnetp, "exit(%d)\n", rv);
652 	return (rv);
653 }
654 
655 /* set or clear promiscuous mode on the device */
656 static int
657 vnet_m_promisc(void *arg, boolean_t on)
658 {
659 	_NOTE(ARGUNUSED(on))
660 
661 	vnet_t *vnetp = arg;
662 	DBG1(vnetp, "enter\n");
663 	/*
664 	 * NOTE: setting promiscuous mode is not supported, just return success.
665 	 */
666 	DBG1(vnetp, "exit\n");
667 	return (VNET_SUCCESS);
668 }
669 
670 /*
671  * Transmit a chain of packets. This function provides switching functionality
672  * based on the destination mac address to reach other guests (within ldoms) or
673  * external hosts.
674  */
675 mblk_t *
676 vnet_m_tx(void *arg, mblk_t *mp)
677 {
678 	vnet_t			*vnetp;
679 	vnet_res_t		*vresp;
680 	mblk_t			*next;
681 	mblk_t			*resid_mp;
682 	mac_register_t		*macp;
683 	struct ether_header	*ehp;
684 	boolean_t		is_unicast;
685 	boolean_t		is_pvid;	/* non-default pvid ? */
686 	boolean_t		hres;		/* Hybrid resource ? */
687 
688 	vnetp = (vnet_t *)arg;
689 	DBG1(vnetp, "enter\n");
690 	ASSERT(mp != NULL);
691 
692 	is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE;
693 
694 	while (mp != NULL) {
695 
696 		next = mp->b_next;
697 		mp->b_next = NULL;
698 
699 		/*
700 		 * Find fdb entry for the destination
701 		 * and hold a reference to it.
702 		 */
703 		ehp = (struct ether_header *)mp->b_rptr;
704 		vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
705 		if (vresp != NULL) {
706 
707 			/*
708 			 * Destination found in FDB.
709 			 * The destination is a vnet device within ldoms
710 			 * and directly reachable, invoke the tx function
711 			 * in the fdb entry.
712 			 */
713 			macp = &vresp->macreg;
714 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
715 
716 			/* tx done; now release ref on fdb entry */
717 			VNET_FDBE_REFRELE(vresp);
718 
719 			if (resid_mp != NULL) {
720 				/* m_tx failed */
721 				mp->b_next = next;
722 				break;
723 			}
724 		} else {
725 			is_unicast = !(IS_BROADCAST(ehp) ||
726 			    (IS_MULTICAST(ehp)));
727 			/*
728 			 * Destination is not in FDB.
729 			 * If the destination is broadcast or multicast,
730 			 * then forward the packet to vswitch.
731 			 * If a Hybrid resource avilable, then send the
732 			 * unicast packet via hybrid resource, otherwise
733 			 * forward it to vswitch.
734 			 */
735 			READ_ENTER(&vnetp->vsw_fp_rw);
736 
737 			if ((is_unicast) && (vnetp->hio_fp != NULL)) {
738 				vresp = vnetp->hio_fp;
739 				hres = B_TRUE;
740 			} else {
741 				vresp = vnetp->vsw_fp;
742 				hres = B_FALSE;
743 			}
744 			if (vresp == NULL) {
745 				/*
746 				 * no fdb entry to vsw? drop the packet.
747 				 */
748 				RW_EXIT(&vnetp->vsw_fp_rw);
749 				freemsg(mp);
750 				mp = next;
751 				continue;
752 			}
753 
754 			/* ref hold the fdb entry to vsw */
755 			VNET_FDBE_REFHOLD(vresp);
756 
757 			RW_EXIT(&vnetp->vsw_fp_rw);
758 
759 			/*
760 			 * In the case of a hybrid resource we need to insert
761 			 * the tag for the pvid case here; unlike packets that
762 			 * are destined to a vnet/vsw in which case the vgen
763 			 * layer does the tagging before sending it over ldc.
764 			 */
765 			if (hres == B_TRUE) {
766 				/*
767 				 * Determine if the frame being transmitted
768 				 * over the hybrid resource is untagged. If so,
769 				 * insert the tag before transmitting.
770 				 */
771 				if (is_pvid == B_TRUE &&
772 				    ehp->ether_type != htons(ETHERTYPE_VLAN)) {
773 
774 					mp = vnet_vlan_insert_tag(mp,
775 					    vnetp->pvid);
776 					if (mp == NULL) {
777 						VNET_FDBE_REFRELE(vresp);
778 						mp = next;
779 						continue;
780 					}
781 
782 				}
783 			}
784 
785 			macp = &vresp->macreg;
786 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
787 
788 			/* tx done; now release ref on fdb entry */
789 			VNET_FDBE_REFRELE(vresp);
790 
791 			if (resid_mp != NULL) {
792 				/* m_tx failed */
793 				mp->b_next = next;
794 				break;
795 			}
796 		}
797 
798 		mp = next;
799 	}
800 
801 	DBG1(vnetp, "exit\n");
802 	return (mp);
803 }
804 
805 /* get statistics from the device */
806 int
807 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
808 {
809 	vnet_t *vnetp = arg;
810 	vnet_res_t	*vresp;
811 	mac_register_t	*macp;
812 	mac_callbacks_t	*cbp;
813 	uint64_t val_total = 0;
814 
815 	DBG1(vnetp, "enter\n");
816 
817 	/*
818 	 * get the specified statistic from each transport and return the
819 	 * aggregate val.  This obviously only works for counters.
820 	 */
821 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
822 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
823 		return (ENOTSUP);
824 	}
825 
826 	READ_ENTER(&vnetp->vrwlock);
827 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
828 		macp = &vresp->macreg;
829 		cbp = macp->m_callbacks;
830 		if (cbp->mc_getstat(macp->m_driver, stat, val) == 0)
831 			val_total += *val;
832 	}
833 	RW_EXIT(&vnetp->vrwlock);
834 
835 	*val = val_total;
836 
837 	DBG1(vnetp, "exit\n");
838 	return (0);
839 }
840 
841 /* wrapper function for mac_register() */
842 static int
843 vnet_mac_register(vnet_t *vnetp)
844 {
845 	mac_register_t	*macp;
846 	int		err;
847 
848 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
849 		return (DDI_FAILURE);
850 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
851 	macp->m_driver = vnetp;
852 	macp->m_dip = vnetp->dip;
853 	macp->m_src_addr = vnetp->curr_macaddr;
854 	macp->m_callbacks = &vnet_m_callbacks;
855 	macp->m_min_sdu = 0;
856 	macp->m_max_sdu = vnetp->mtu;
857 	macp->m_margin = VLAN_TAGSZ;
858 
859 	/*
860 	 * Finally, we're ready to register ourselves with the MAC layer
861 	 * interface; if this succeeds, we're all ready to start()
862 	 */
863 	err = mac_register(macp, &vnetp->mh);
864 	mac_free(macp);
865 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
866 }
867 
868 /* read the mac address of the device */
869 static int
870 vnet_read_mac_address(vnet_t *vnetp)
871 {
872 	uchar_t 	*macaddr;
873 	uint32_t 	size;
874 	int 		rv;
875 
876 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
877 	    DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
878 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
879 		DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
880 		    macaddr_propname, rv);
881 		return (DDI_FAILURE);
882 	}
883 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
884 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
885 	ddi_prop_free(macaddr);
886 
887 	return (DDI_SUCCESS);
888 }
889 
890 static void
891 vnet_fdb_create(vnet_t *vnetp)
892 {
893 	char		hashname[MAXNAMELEN];
894 
895 	(void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
896 	    vnetp->instance);
897 	vnetp->fdb_nchains = vnet_fdb_nchains;
898 	vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
899 	    mod_hash_null_valdtor, sizeof (void *));
900 }
901 
902 static void
903 vnet_fdb_destroy(vnet_t *vnetp)
904 {
905 	/* destroy fdb-hash-table */
906 	if (vnetp->fdb_hashp != NULL) {
907 		mod_hash_destroy_hash(vnetp->fdb_hashp);
908 		vnetp->fdb_hashp = NULL;
909 		vnetp->fdb_nchains = 0;
910 	}
911 }
912 
913 /*
914  * Add an entry into the fdb.
915  */
916 void
917 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp)
918 {
919 	uint64_t	addr = 0;
920 	int		rv;
921 
922 	KEY_HASH(addr, vresp->rem_macaddr);
923 
924 	/*
925 	 * If the entry being added corresponds to LDC_SERVICE resource,
926 	 * that is, vswitch connection, it is added to the hash and also
927 	 * the entry is cached, an additional reference count reflects
928 	 * this. The HYBRID resource is not added to the hash, but only
929 	 * cached, as it is only used for sending out packets for unknown
930 	 * unicast destinations.
931 	 */
932 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
933 	    (vresp->refcnt = 1) : (vresp->refcnt = 0);
934 
935 	/*
936 	 * Note: duplicate keys will be rejected by mod_hash.
937 	 */
938 	if (vresp->type != VIO_NET_RES_HYBRID) {
939 		rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
940 		    (mod_hash_val_t)vresp);
941 		if (rv != 0) {
942 			DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
943 			return;
944 		}
945 	}
946 
947 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
948 		/* Cache the fdb entry to vsw-port */
949 		WRITE_ENTER(&vnetp->vsw_fp_rw);
950 		if (vnetp->vsw_fp == NULL)
951 			vnetp->vsw_fp = vresp;
952 		RW_EXIT(&vnetp->vsw_fp_rw);
953 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
954 		/* Cache the fdb entry to hybrid resource */
955 		WRITE_ENTER(&vnetp->vsw_fp_rw);
956 		if (vnetp->hio_fp == NULL)
957 			vnetp->hio_fp = vresp;
958 		RW_EXIT(&vnetp->vsw_fp_rw);
959 	}
960 }
961 
962 /*
963  * Remove an entry from fdb.
964  */
965 static void
966 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp)
967 {
968 	uint64_t	addr = 0;
969 	int		rv;
970 	uint32_t	refcnt;
971 	vnet_res_t	*tmp;
972 
973 	KEY_HASH(addr, vresp->rem_macaddr);
974 
975 	/*
976 	 * Remove the entry from fdb hash table.
977 	 * This prevents further references to this fdb entry.
978 	 */
979 	if (vresp->type != VIO_NET_RES_HYBRID) {
980 		rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
981 		    (mod_hash_val_t *)&tmp);
982 		if (rv != 0) {
983 			/*
984 			 * As the resources are added to the hash only
985 			 * after they are started, this can occur if
986 			 * a resource unregisters before it is ever started.
987 			 */
988 			return;
989 		}
990 	}
991 
992 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
993 		WRITE_ENTER(&vnetp->vsw_fp_rw);
994 
995 		ASSERT(tmp == vnetp->vsw_fp);
996 		vnetp->vsw_fp = NULL;
997 
998 		RW_EXIT(&vnetp->vsw_fp_rw);
999 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
1000 		WRITE_ENTER(&vnetp->vsw_fp_rw);
1001 
1002 		vnetp->hio_fp = NULL;
1003 
1004 		RW_EXIT(&vnetp->vsw_fp_rw);
1005 	}
1006 
1007 	/*
1008 	 * If there are threads already ref holding before the entry was
1009 	 * removed from hash table, then wait for ref count to drop to zero.
1010 	 */
1011 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
1012 	    (refcnt = 1) : (refcnt = 0);
1013 	while (vresp->refcnt > refcnt) {
1014 		delay(drv_usectohz(vnet_fdbe_refcnt_delay));
1015 	}
1016 }
1017 
1018 /*
1019  * Search fdb for a given mac address. If an entry is found, hold
1020  * a reference to it and return the entry; else returns NULL.
1021  */
1022 static vnet_res_t *
1023 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
1024 {
1025 	uint64_t	key = 0;
1026 	vnet_res_t	*vresp;
1027 	int		rv;
1028 
1029 	KEY_HASH(key, addrp->ether_addr_octet);
1030 
1031 	rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
1032 	    (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb);
1033 
1034 	if (rv != 0)
1035 		return (NULL);
1036 
1037 	return (vresp);
1038 }
1039 
1040 /*
1041  * Callback function provided to mod_hash_find_cb(). After finding the fdb
1042  * entry corresponding to the key (macaddr), this callback will be invoked by
1043  * mod_hash_find_cb() to atomically increment the reference count on the fdb
1044  * entry before returning the found entry.
1045  */
1046 static void
1047 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
1048 {
1049 	_NOTE(ARGUNUSED(key))
1050 	VNET_FDBE_REFHOLD((vnet_res_t *)val);
1051 }
1052 
1053 /*
1054  * Frames received that are tagged with the pvid of the vnet device must be
1055  * untagged before sending up the stack. This function walks the chain of rx
1056  * frames, untags any such frames and returns the updated chain.
1057  *
1058  * Arguments:
1059  *    pvid:  pvid of the vnet device for which packets are being received
1060  *    mp:    head of pkt chain to be validated and untagged
1061  *
1062  * Returns:
1063  *    mp:    head of updated chain of packets
1064  */
1065 static void
1066 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp)
1067 {
1068 	struct ether_vlan_header	*evhp;
1069 	mblk_t				*bp;
1070 	mblk_t				*bpt;
1071 	mblk_t				*bph;
1072 	mblk_t				*bpn;
1073 
1074 	bpn = bph = bpt = NULL;
1075 
1076 	for (bp = *mp; bp != NULL; bp = bpn) {
1077 
1078 		bpn = bp->b_next;
1079 		bp->b_next = bp->b_prev = NULL;
1080 
1081 		evhp = (struct ether_vlan_header *)bp->b_rptr;
1082 
1083 		if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN &&
1084 		    VLAN_ID(ntohs(evhp->ether_tci)) == pvid) {
1085 
1086 			bp = vnet_vlan_remove_tag(bp);
1087 			if (bp == NULL) {
1088 				continue;
1089 			}
1090 
1091 		}
1092 
1093 		/* build a chain of processed packets */
1094 		if (bph == NULL) {
1095 			bph = bpt = bp;
1096 		} else {
1097 			bpt->b_next = bp;
1098 			bpt = bp;
1099 		}
1100 
1101 	}
1102 
1103 	*mp = bph;
1104 }
1105 
1106 static void
1107 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
1108 {
1109 	vnet_res_t	*vresp = (vnet_res_t *)vrh;
1110 	vnet_t		*vnetp = vresp->vnetp;
1111 
1112 	if ((vnetp == NULL) || (vnetp->mh == 0)) {
1113 		freemsgchain(mp);
1114 		return;
1115 	}
1116 
1117 	/*
1118 	 * Packets received over a hybrid resource need additional processing
1119 	 * to remove the tag, for the pvid case. The underlying resource is
1120 	 * not aware of the vnet's pvid and thus packets are received with the
1121 	 * vlan tag in the header; unlike packets that are received over a ldc
1122 	 * channel in which case the peer vnet/vsw would have already removed
1123 	 * the tag.
1124 	 */
1125 	if (vresp->type == VIO_NET_RES_HYBRID &&
1126 	    vnetp->pvid != vnetp->default_vlan_id) {
1127 
1128 		vnet_rx_frames_untag(vnetp->pvid, &mp);
1129 		if (mp == NULL) {
1130 			return;
1131 		}
1132 	}
1133 
1134 	mac_rx(vnetp->mh, NULL, mp);
1135 }
1136 
1137 void
1138 vnet_tx_update(vio_net_handle_t vrh)
1139 {
1140 	vnet_res_t *vresp = (vnet_res_t *)vrh;
1141 	vnet_t *vnetp = vresp->vnetp;
1142 
1143 	if ((vnetp != NULL) && (vnetp->mh != NULL)) {
1144 		mac_tx_update(vnetp->mh);
1145 	}
1146 }
1147 
1148 /*
1149  * Update the new mtu of vnet into the mac layer. First check if the device has
1150  * been plumbed and if so fail the mtu update. Returns 0 on success.
1151  */
1152 int
1153 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu)
1154 {
1155 	int	rv;
1156 
1157 	if (vnetp == NULL || vnetp->mh == NULL) {
1158 		return (EINVAL);
1159 	}
1160 
1161 	WRITE_ENTER(&vnetp->vrwlock);
1162 
1163 	if (vnetp->flags & VNET_STARTED) {
1164 		RW_EXIT(&vnetp->vrwlock);
1165 		cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu "
1166 		    "update as the device is plumbed\n",
1167 		    vnetp->instance);
1168 		return (EBUSY);
1169 	}
1170 
1171 	/* update mtu in the mac layer */
1172 	rv = mac_maxsdu_update(vnetp->mh, mtu);
1173 	if (rv != 0) {
1174 		RW_EXIT(&vnetp->vrwlock);
1175 		cmn_err(CE_NOTE,
1176 		    "!vnet%d: Unable to update mtu with mac layer\n",
1177 		    vnetp->instance);
1178 		return (EIO);
1179 	}
1180 
1181 	vnetp->mtu = mtu;
1182 
1183 	RW_EXIT(&vnetp->vrwlock);
1184 
1185 	return (0);
1186 }
1187 
1188 /*
1189  * Update the link state of vnet to the mac layer.
1190  */
1191 void
1192 vnet_link_update(vnet_t *vnetp, link_state_t link_state)
1193 {
1194 	if (vnetp == NULL || vnetp->mh == NULL) {
1195 		return;
1196 	}
1197 
1198 	WRITE_ENTER(&vnetp->vrwlock);
1199 	if (vnetp->link_state == link_state) {
1200 		RW_EXIT(&vnetp->vrwlock);
1201 		return;
1202 	}
1203 	vnetp->link_state = link_state;
1204 	RW_EXIT(&vnetp->vrwlock);
1205 
1206 	mac_link_update(vnetp->mh, link_state);
1207 }
1208 
1209 /*
1210  * vio_net_resource_reg -- An interface called to register a resource
1211  *	with vnet.
1212  *	macp -- a GLDv3 mac_register that has all the details of
1213  *		a resource and its callbacks etc.
1214  *	type -- resource type.
1215  *	local_macaddr -- resource's MAC address. This is used to
1216  *			 associate a resource with a corresponding vnet.
1217  *	remote_macaddr -- remote side MAC address. This is ignored for
1218  *			  the Hybrid resources.
1219  *	vhp -- A handle returned to the caller.
1220  *	vcb -- A set of callbacks provided to the callers.
1221  */
1222 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
1223     ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
1224     vio_net_callbacks_t *vcb)
1225 {
1226 	vnet_t	*vnetp;
1227 	vnet_res_t *vresp;
1228 
1229 	vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
1230 	ether_copy(local_macaddr, vresp->local_macaddr);
1231 	ether_copy(rem_macaddr, vresp->rem_macaddr);
1232 	vresp->type = type;
1233 	bcopy(macp, &vresp->macreg, sizeof (mac_register_t));
1234 
1235 	DBG1(NULL, "Resource Registerig type=0%X\n", type);
1236 
1237 	READ_ENTER(&vnet_rw);
1238 	vnetp = vnet_headp;
1239 	while (vnetp != NULL) {
1240 		if (VNET_MATCH_RES(vresp, vnetp)) {
1241 			vresp->vnetp = vnetp;
1242 
1243 			/* Setup kstats for hio resource */
1244 			if (vresp->type == VIO_NET_RES_HYBRID) {
1245 				vresp->ksp = vnet_hio_setup_kstats(DRV_NAME,
1246 				    "hio", vresp);
1247 				if (vresp->ksp == NULL) {
1248 					cmn_err(CE_NOTE, "!vnet%d: Cannot "
1249 					    "create kstats for hio resource",
1250 					    vnetp->instance);
1251 				}
1252 			}
1253 
1254 			WRITE_ENTER(&vnetp->vrwlock);
1255 			vresp->nextp = vnetp->vres_list;
1256 			vnetp->vres_list = vresp;
1257 			RW_EXIT(&vnetp->vrwlock);
1258 			break;
1259 		}
1260 		vnetp = vnetp->nextp;
1261 	}
1262 	RW_EXIT(&vnet_rw);
1263 	if (vresp->vnetp == NULL) {
1264 		DWARN(NULL, "No vnet instance");
1265 		kmem_free(vresp, sizeof (vnet_res_t));
1266 		return (ENXIO);
1267 	}
1268 
1269 	*vhp = vresp;
1270 	vcb->vio_net_rx_cb = vnet_rx;
1271 	vcb->vio_net_tx_update = vnet_tx_update;
1272 	vcb->vio_net_report_err = vnet_handle_res_err;
1273 
1274 	/* Dispatch a task to start resources */
1275 	vnet_dispatch_res_task(vnetp);
1276 	return (0);
1277 }
1278 
1279 /*
1280  * vio_net_resource_unreg -- An interface to unregister a resource.
1281  */
1282 void
1283 vio_net_resource_unreg(vio_net_handle_t vhp)
1284 {
1285 	vnet_res_t *vresp = (vnet_res_t *)vhp;
1286 	vnet_t *vnetp = vresp->vnetp;
1287 	vnet_res_t *vrp;
1288 	kstat_t *ksp = NULL;
1289 
1290 	DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
1291 
1292 	ASSERT(vnetp != NULL);
1293 	vnet_fdbe_del(vnetp, vresp);
1294 
1295 	WRITE_ENTER(&vnetp->vrwlock);
1296 	if (vresp == vnetp->vres_list) {
1297 		vnetp->vres_list = vresp->nextp;
1298 	} else {
1299 		vrp = vnetp->vres_list;
1300 		while (vrp->nextp != NULL) {
1301 			if (vrp->nextp == vresp) {
1302 				vrp->nextp = vresp->nextp;
1303 				break;
1304 			}
1305 			vrp = vrp->nextp;
1306 		}
1307 	}
1308 
1309 	ksp = vresp->ksp;
1310 	vresp->ksp = NULL;
1311 
1312 	vresp->vnetp = NULL;
1313 	vresp->nextp = NULL;
1314 	RW_EXIT(&vnetp->vrwlock);
1315 	vnet_hio_destroy_kstats(ksp);
1316 	KMEM_FREE(vresp);
1317 }
1318 
1319 /*
1320  * vnet_dds_rx -- an interface called by vgen to DDS messages.
1321  */
1322 void
1323 vnet_dds_rx(void *arg, void *dmsg)
1324 {
1325 	vnet_t *vnetp = arg;
1326 	vdds_process_dds_msg(vnetp, dmsg);
1327 }
1328 
1329 /*
1330  * vnet_send_dds_msg -- An interface provided to DDS to send
1331  *	DDS messages. This simply sends meessages via vgen.
1332  */
1333 int
1334 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg)
1335 {
1336 	int rv;
1337 
1338 	if (vnetp->vgenhdl != NULL) {
1339 		rv = vgen_dds_tx(vnetp->vgenhdl, dmsg);
1340 	}
1341 	return (rv);
1342 }
1343 
1344 /*
1345  * vnet_handle_res_err -- A callback function called by a resource
1346  *	to report an error. For example, vgen can call to report
1347  *	an LDC down/reset event. This will trigger cleanup of associated
1348  *	Hybrid resource.
1349  */
1350 /* ARGSUSED */
1351 static void
1352 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err)
1353 {
1354 	vnet_res_t *vresp = (vnet_res_t *)vrh;
1355 	vnet_t *vnetp = vresp->vnetp;
1356 	int rv;
1357 
1358 	if (vnetp == NULL) {
1359 		return;
1360 	}
1361 	if ((vresp->type != VIO_NET_RES_LDC_SERVICE) &&
1362 	    (vresp->type != VIO_NET_RES_HYBRID)) {
1363 		return;
1364 	}
1365 	rv = ddi_taskq_dispatch(vnetp->taskqp, vdds_cleanup_hybrid_res,
1366 	    vnetp, DDI_NOSLEEP);
1367 	if (rv != DDI_SUCCESS) {
1368 		cmn_err(CE_WARN,
1369 		    "vnet%d:Failed to dispatch task to cleanup hybrid resource",
1370 		    vnetp->instance);
1371 	}
1372 }
1373 
1374 /*
1375  * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
1376  */
1377 static void
1378 vnet_dispatch_res_task(vnet_t *vnetp)
1379 {
1380 	int rv;
1381 
1382 	WRITE_ENTER(&vnetp->vrwlock);
1383 	if (vnetp->flags & VNET_STARTED) {
1384 		rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task,
1385 		    vnetp, DDI_NOSLEEP);
1386 		if (rv != DDI_SUCCESS) {
1387 			cmn_err(CE_WARN,
1388 			    "vnet%d:Can't dispatch start resource task",
1389 			    vnetp->instance);
1390 		}
1391 	}
1392 	RW_EXIT(&vnetp->vrwlock);
1393 }
1394 
1395 /*
1396  * vnet_res_start_task -- A taskq callback function that starts a resource.
1397  */
1398 static void
1399 vnet_res_start_task(void *arg)
1400 {
1401 	vnet_t *vnetp = arg;
1402 
1403 	WRITE_ENTER(&vnetp->vrwlock);
1404 	if (vnetp->flags & VNET_STARTED) {
1405 		vnet_start_resources(vnetp);
1406 	}
1407 	RW_EXIT(&vnetp->vrwlock);
1408 }
1409 
1410 /*
1411  * vnet_start_resources -- starts all resources associated with
1412  *	a vnet.
1413  */
1414 static void
1415 vnet_start_resources(vnet_t *vnetp)
1416 {
1417 	mac_register_t	*macp;
1418 	mac_callbacks_t	*cbp;
1419 	vnet_res_t	*vresp;
1420 	int rv;
1421 
1422 	DBG1(vnetp, "enter\n");
1423 
1424 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
1425 		/* skip if it is already started */
1426 		if (vresp->flags & VNET_STARTED) {
1427 			continue;
1428 		}
1429 		macp = &vresp->macreg;
1430 		cbp = macp->m_callbacks;
1431 		rv = cbp->mc_start(macp->m_driver);
1432 		if (rv == 0) {
1433 			/*
1434 			 * Successfully started the resource, so now
1435 			 * add it to the fdb.
1436 			 */
1437 			vresp->flags |= VNET_STARTED;
1438 			vnet_fdbe_add(vnetp, vresp);
1439 		}
1440 	}
1441 
1442 	DBG1(vnetp, "exit\n");
1443 
1444 }
1445 
1446 /*
1447  * vnet_stop_resources -- stop all resources associated with a vnet.
1448  */
1449 static void
1450 vnet_stop_resources(vnet_t *vnetp)
1451 {
1452 	vnet_res_t	*vresp;
1453 	vnet_res_t	*nvresp;
1454 	mac_register_t	*macp;
1455 	mac_callbacks_t	*cbp;
1456 
1457 	DBG1(vnetp, "enter\n");
1458 
1459 	for (vresp = vnetp->vres_list; vresp != NULL; ) {
1460 		nvresp = vresp->nextp;
1461 		if (vresp->flags & VNET_STARTED) {
1462 			macp = &vresp->macreg;
1463 			cbp = macp->m_callbacks;
1464 			cbp->mc_stop(macp->m_driver);
1465 			vresp->flags &= ~VNET_STARTED;
1466 		}
1467 		vresp = nvresp;
1468 	}
1469 	DBG1(vnetp, "exit\n");
1470 }
1471 
1472 /*
1473  * Setup kstats for the HIO statistics.
1474  * NOTE: the synchronization for the statistics is the
1475  * responsibility of the caller.
1476  */
1477 kstat_t *
1478 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp)
1479 {
1480 	kstat_t *ksp;
1481 	vnet_t *vnetp = vresp->vnetp;
1482 	vnet_hio_kstats_t *hiokp;
1483 	size_t size;
1484 
1485 	ASSERT(vnetp != NULL);
1486 	size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t);
1487 	ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net",
1488 	    KSTAT_TYPE_NAMED, size, 0);
1489 	if (ksp == NULL) {
1490 		return (NULL);
1491 	}
1492 
1493 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1494 	kstat_named_init(&hiokp->ipackets,		"ipackets",
1495 	    KSTAT_DATA_ULONG);
1496 	kstat_named_init(&hiokp->ierrors,		"ierrors",
1497 	    KSTAT_DATA_ULONG);
1498 	kstat_named_init(&hiokp->opackets,		"opackets",
1499 	    KSTAT_DATA_ULONG);
1500 	kstat_named_init(&hiokp->oerrors,		"oerrors",
1501 	    KSTAT_DATA_ULONG);
1502 
1503 
1504 	/* MIB II kstat variables */
1505 	kstat_named_init(&hiokp->rbytes,		"rbytes",
1506 	    KSTAT_DATA_ULONG);
1507 	kstat_named_init(&hiokp->obytes,		"obytes",
1508 	    KSTAT_DATA_ULONG);
1509 	kstat_named_init(&hiokp->multircv,		"multircv",
1510 	    KSTAT_DATA_ULONG);
1511 	kstat_named_init(&hiokp->multixmt,		"multixmt",
1512 	    KSTAT_DATA_ULONG);
1513 	kstat_named_init(&hiokp->brdcstrcv,		"brdcstrcv",
1514 	    KSTAT_DATA_ULONG);
1515 	kstat_named_init(&hiokp->brdcstxmt,		"brdcstxmt",
1516 	    KSTAT_DATA_ULONG);
1517 	kstat_named_init(&hiokp->norcvbuf,		"norcvbuf",
1518 	    KSTAT_DATA_ULONG);
1519 	kstat_named_init(&hiokp->noxmtbuf,		"noxmtbuf",
1520 	    KSTAT_DATA_ULONG);
1521 
1522 	ksp->ks_update = vnet_hio_update_kstats;
1523 	ksp->ks_private = (void *)vresp;
1524 	kstat_install(ksp);
1525 	return (ksp);
1526 }
1527 
1528 /*
1529  * Destroy kstats.
1530  */
1531 static void
1532 vnet_hio_destroy_kstats(kstat_t *ksp)
1533 {
1534 	if (ksp != NULL)
1535 		kstat_delete(ksp);
1536 }
1537 
1538 /*
1539  * Update the kstats.
1540  */
1541 static int
1542 vnet_hio_update_kstats(kstat_t *ksp, int rw)
1543 {
1544 	vnet_t *vnetp;
1545 	vnet_res_t *vresp;
1546 	vnet_hio_stats_t statsp;
1547 	vnet_hio_kstats_t *hiokp;
1548 
1549 	vresp = (vnet_res_t *)ksp->ks_private;
1550 	vnetp = vresp->vnetp;
1551 
1552 	bzero(&statsp, sizeof (vnet_hio_stats_t));
1553 
1554 	READ_ENTER(&vnetp->vsw_fp_rw);
1555 	if (vnetp->hio_fp == NULL) {
1556 		/* not using hio resources, just return */
1557 		RW_EXIT(&vnetp->vsw_fp_rw);
1558 		return (0);
1559 	}
1560 	VNET_FDBE_REFHOLD(vnetp->hio_fp);
1561 	RW_EXIT(&vnetp->vsw_fp_rw);
1562 	vnet_hio_get_stats(vnetp->hio_fp, &statsp);
1563 	VNET_FDBE_REFRELE(vnetp->hio_fp);
1564 
1565 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1566 
1567 	if (rw == KSTAT_READ) {
1568 		/* Link Input/Output stats */
1569 		hiokp->ipackets.value.ul	= (uint32_t)statsp.ipackets;
1570 		hiokp->ipackets64.value.ull	= statsp.ipackets;
1571 		hiokp->ierrors.value.ul		= statsp.ierrors;
1572 		hiokp->opackets.value.ul	= (uint32_t)statsp.opackets;
1573 		hiokp->opackets64.value.ull	= statsp.opackets;
1574 		hiokp->oerrors.value.ul		= statsp.oerrors;
1575 
1576 		/* MIB II kstat variables */
1577 		hiokp->rbytes.value.ul		= (uint32_t)statsp.rbytes;
1578 		hiokp->rbytes64.value.ull	= statsp.rbytes;
1579 		hiokp->obytes.value.ul		= (uint32_t)statsp.obytes;
1580 		hiokp->obytes64.value.ull	= statsp.obytes;
1581 		hiokp->multircv.value.ul	= statsp.multircv;
1582 		hiokp->multixmt.value.ul	= statsp.multixmt;
1583 		hiokp->brdcstrcv.value.ul	= statsp.brdcstrcv;
1584 		hiokp->brdcstxmt.value.ul	= statsp.brdcstxmt;
1585 		hiokp->norcvbuf.value.ul	= statsp.norcvbuf;
1586 		hiokp->noxmtbuf.value.ul	= statsp.noxmtbuf;
1587 	} else {
1588 		return (EACCES);
1589 	}
1590 
1591 	return (0);
1592 }
1593 
1594 static void
1595 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp)
1596 {
1597 	mac_register_t		*macp;
1598 	mac_callbacks_t		*cbp;
1599 	uint64_t		val;
1600 	int			stat;
1601 
1602 	/*
1603 	 * get the specified statistics from the underlying nxge.
1604 	 */
1605 	macp = &vresp->macreg;
1606 	cbp = macp->m_callbacks;
1607 	for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) {
1608 		if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) {
1609 			switch (stat) {
1610 			case MAC_STAT_IPACKETS:
1611 				statsp->ipackets = val;
1612 				break;
1613 
1614 			case MAC_STAT_IERRORS:
1615 				statsp->ierrors = val;
1616 				break;
1617 
1618 			case MAC_STAT_OPACKETS:
1619 				statsp->opackets = val;
1620 				break;
1621 
1622 			case MAC_STAT_OERRORS:
1623 				statsp->oerrors = val;
1624 				break;
1625 
1626 			case MAC_STAT_RBYTES:
1627 				statsp->rbytes = val;
1628 				break;
1629 
1630 			case MAC_STAT_OBYTES:
1631 				statsp->obytes = val;
1632 				break;
1633 
1634 			case MAC_STAT_MULTIRCV:
1635 				statsp->multircv = val;
1636 				break;
1637 
1638 			case MAC_STAT_MULTIXMT:
1639 				statsp->multixmt = val;
1640 				break;
1641 
1642 			case MAC_STAT_BRDCSTRCV:
1643 				statsp->brdcstrcv = val;
1644 				break;
1645 
1646 			case MAC_STAT_BRDCSTXMT:
1647 				statsp->brdcstxmt = val;
1648 				break;
1649 
1650 			case MAC_STAT_NOXMTBUF:
1651 				statsp->noxmtbuf = val;
1652 				break;
1653 
1654 			case MAC_STAT_NORCVBUF:
1655 				statsp->norcvbuf = val;
1656 				break;
1657 
1658 			default:
1659 				/*
1660 				 * parameters not interested.
1661 				 */
1662 				break;
1663 			}
1664 		}
1665 	}
1666 }
1667 
1668 #ifdef	VNET_IOC_DEBUG
1669 
1670 /*
1671  * The ioctl entry point is used only for debugging for now. The ioctl commands
1672  * can be used to force the link state of the channel connected to vsw.
1673  */
1674 static void
1675 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1676 {
1677 	struct iocblk	*iocp;
1678 	vnet_t		*vnetp;
1679 
1680 	iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
1681 	iocp->ioc_error = 0;
1682 	vnetp = (vnet_t *)arg;
1683 
1684 	if (vnetp == NULL) {
1685 		miocnak(q, mp, 0, EINVAL);
1686 		return;
1687 	}
1688 
1689 	switch (iocp->ioc_cmd) {
1690 
1691 	case VNET_FORCE_LINK_DOWN:
1692 	case VNET_FORCE_LINK_UP:
1693 		vnet_force_link_state(vnetp, q, mp);
1694 		break;
1695 
1696 	default:
1697 		iocp->ioc_error = EINVAL;
1698 		miocnak(q, mp, 0, iocp->ioc_error);
1699 		break;
1700 
1701 	}
1702 }
1703 
1704 static void
1705 vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp)
1706 {
1707 	mac_register_t	*macp;
1708 	mac_callbacks_t	*cbp;
1709 	vnet_res_t	*vresp;
1710 
1711 	READ_ENTER(&vnetp->vsw_fp_rw);
1712 
1713 	vresp = vnetp->vsw_fp;
1714 	if (vresp == NULL) {
1715 		RW_EXIT(&vnetp->vsw_fp_rw);
1716 		return;
1717 	}
1718 
1719 	macp = &vresp->macreg;
1720 	cbp = macp->m_callbacks;
1721 	cbp->mc_ioctl(macp->m_driver, q, mp);
1722 
1723 	RW_EXIT(&vnetp->vsw_fp_rw);
1724 }
1725 
1726 #else
1727 
1728 static void
1729 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1730 {
1731 	vnet_t		*vnetp;
1732 
1733 	vnetp = (vnet_t *)arg;
1734 
1735 	if (vnetp == NULL) {
1736 		miocnak(q, mp, 0, EINVAL);
1737 		return;
1738 	}
1739 
1740 	/* ioctl support only for debugging */
1741 	miocnak(q, mp, 0, ENOTSUP);
1742 }
1743 
1744 #endif
1745