xref: /titanic_41/usr/src/uts/sun4v/io/vnet.c (revision 0bb073995ac5a95bd35f2dd790df1ea3d8c2d507)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/param.h>
30 #include <sys/stream.h>
31 #include <sys/kmem.h>
32 #include <sys/conf.h>
33 #include <sys/devops.h>
34 #include <sys/ksynch.h>
35 #include <sys/stat.h>
36 #include <sys/modctl.h>
37 #include <sys/modhash.h>
38 #include <sys/debug.h>
39 #include <sys/ethernet.h>
40 #include <sys/dlpi.h>
41 #include <net/if.h>
42 #include <sys/mac.h>
43 #include <sys/mac_ether.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/strsun.h>
47 #include <sys/note.h>
48 #include <sys/atomic.h>
49 #include <sys/vnet.h>
50 #include <sys/vlan.h>
51 #include <sys/vnet_mailbox.h>
52 #include <sys/vnet_common.h>
53 #include <sys/dds.h>
54 #include <sys/strsubr.h>
55 #include <sys/taskq.h>
56 
57 /*
58  * Function prototypes.
59  */
60 
61 /* DDI entrypoints */
62 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
63 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
64 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
65 
66 /* MAC entrypoints  */
67 static int vnet_m_stat(void *, uint_t, uint64_t *);
68 static int vnet_m_start(void *);
69 static void vnet_m_stop(void *);
70 static int vnet_m_promisc(void *, boolean_t);
71 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
72 static int vnet_m_unicst(void *, const uint8_t *);
73 mblk_t *vnet_m_tx(void *, mblk_t *);
74 
75 /* vnet internal functions */
76 static int vnet_mac_register(vnet_t *);
77 static int vnet_read_mac_address(vnet_t *vnetp);
78 
79 /* Forwarding database (FDB) routines */
80 static void vnet_fdb_create(vnet_t *vnetp);
81 static void vnet_fdb_destroy(vnet_t *vnetp);
82 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp);
83 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
84 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp);
85 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp);
86 
87 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp);
88 static void vnet_tx_update(vio_net_handle_t vrh);
89 static void vnet_res_start_task(void *arg);
90 static void vnet_start_resources(vnet_t *vnetp);
91 static void vnet_stop_resources(vnet_t *vnetp);
92 static void vnet_dispatch_res_task(vnet_t *vnetp);
93 static void vnet_res_start_task(void *arg);
94 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
95 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
96 
97 /* Exported to to vnet_dds */
98 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
99 
100 /* Externs that are imported from vnet_gen */
101 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
102     const uint8_t *macaddr, void **vgenhdl);
103 extern int vgen_uninit(void *arg);
104 extern int vgen_dds_tx(void *arg, void *dmsg);
105 
106 /* Externs that are imported from vnet_dds */
107 extern void vdds_mod_init(void);
108 extern void vdds_mod_fini(void);
109 extern int vdds_init(vnet_t *vnetp);
110 extern void vdds_cleanup(vnet_t *vnetp);
111 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
112 extern void vdds_cleanup_hybrid_res(vnet_t *vnetp);
113 
114 #define	VNET_FDBE_REFHOLD(p)						\
115 {									\
116 	atomic_inc_32(&(p)->refcnt);					\
117 	ASSERT((p)->refcnt != 0);					\
118 }
119 
120 #define	VNET_FDBE_REFRELE(p)						\
121 {									\
122 	ASSERT((p)->refcnt != 0);					\
123 	atomic_dec_32(&(p)->refcnt);					\
124 }
125 
126 static mac_callbacks_t vnet_m_callbacks = {
127 	0,
128 	vnet_m_stat,
129 	vnet_m_start,
130 	vnet_m_stop,
131 	vnet_m_promisc,
132 	vnet_m_multicst,
133 	vnet_m_unicst,
134 	vnet_m_tx,
135 	NULL,
136 	NULL,
137 	NULL
138 };
139 
140 /*
141  * Linked list of "vnet_t" structures - one per instance.
142  */
143 static vnet_t	*vnet_headp = NULL;
144 static krwlock_t vnet_rw;
145 
146 /* Tunables */
147 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
148 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
149 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
150 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
151 
152 /*
153  * Set this to non-zero to enable additional internal receive buffer pools
154  * based on the MTU of the device for better performance at the cost of more
155  * memory consumption. This is turned off by default, to use allocb(9F) for
156  * receive buffer allocations of sizes > 2K.
157  */
158 boolean_t vnet_jumbo_rxpools = B_FALSE;
159 
160 /* # of chains in fdb hash table */
161 uint32_t	vnet_fdb_nchains = VNET_NFDB_HASH;
162 
163 /* Internal tunables */
164 uint32_t	vnet_ethermtu = 1500;	/* mtu of the device */
165 
166 /*
167  * Default vlan id. This is only used internally when the "default-vlan-id"
168  * property is not present in the MD device node. Therefore, this should not be
169  * used as a tunable; if this value is changed, the corresponding variable
170  * should be updated to the same value in vsw and also other vnets connected to
171  * the same vsw.
172  */
173 uint16_t	vnet_default_vlan_id = 1;
174 
175 /* delay in usec to wait for all references on a fdb entry to be dropped */
176 uint32_t vnet_fdbe_refcnt_delay = 10;
177 
178 static struct ether_addr etherbroadcastaddr = {
179 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
180 };
181 
182 
183 /*
184  * Property names
185  */
186 static char macaddr_propname[] = "local-mac-address";
187 
188 /*
189  * This is the string displayed by modinfo(1m).
190  */
191 static char vnet_ident[] = "vnet driver";
192 extern struct mod_ops mod_driverops;
193 static struct cb_ops cb_vnetops = {
194 	nulldev,		/* cb_open */
195 	nulldev,		/* cb_close */
196 	nodev,			/* cb_strategy */
197 	nodev,			/* cb_print */
198 	nodev,			/* cb_dump */
199 	nodev,			/* cb_read */
200 	nodev,			/* cb_write */
201 	nodev,			/* cb_ioctl */
202 	nodev,			/* cb_devmap */
203 	nodev,			/* cb_mmap */
204 	nodev,			/* cb_segmap */
205 	nochpoll,		/* cb_chpoll */
206 	ddi_prop_op,		/* cb_prop_op */
207 	NULL,			/* cb_stream */
208 	(int)(D_MP)		/* cb_flag */
209 };
210 
211 static struct dev_ops vnetops = {
212 	DEVO_REV,		/* devo_rev */
213 	0,			/* devo_refcnt */
214 	NULL,			/* devo_getinfo */
215 	nulldev,		/* devo_identify */
216 	nulldev,		/* devo_probe */
217 	vnetattach,		/* devo_attach */
218 	vnetdetach,		/* devo_detach */
219 	nodev,			/* devo_reset */
220 	&cb_vnetops,		/* devo_cb_ops */
221 	(struct bus_ops *)NULL,	/* devo_bus_ops */
222 	NULL,			/* devo_power */
223 	ddi_quiesce_not_supported,	/* devo_quiesce */
224 };
225 
226 static struct modldrv modldrv = {
227 	&mod_driverops,		/* Type of module.  This one is a driver */
228 	vnet_ident,		/* ID string */
229 	&vnetops		/* driver specific ops */
230 };
231 
232 static struct modlinkage modlinkage = {
233 	MODREV_1, (void *)&modldrv, NULL
234 };
235 
236 #ifdef DEBUG
237 
238 /*
239  * Print debug messages - set to 0xf to enable all msgs
240  */
241 int vnet_dbglevel = 0x8;
242 
243 static void
244 debug_printf(const char *fname, void *arg, const char *fmt, ...)
245 {
246 	char    buf[512];
247 	va_list ap;
248 	vnet_t *vnetp = (vnet_t *)arg;
249 	char    *bufp = buf;
250 
251 	if (vnetp == NULL) {
252 		(void) sprintf(bufp, "%s: ", fname);
253 		bufp += strlen(bufp);
254 	} else {
255 		(void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
256 		bufp += strlen(bufp);
257 	}
258 	va_start(ap, fmt);
259 	(void) vsprintf(bufp, fmt, ap);
260 	va_end(ap);
261 	cmn_err(CE_CONT, "%s\n", buf);
262 }
263 
264 #endif
265 
266 /* _init(9E): initialize the loadable module */
267 int
268 _init(void)
269 {
270 	int status;
271 
272 	DBG1(NULL, "enter\n");
273 
274 	mac_init_ops(&vnetops, "vnet");
275 	status = mod_install(&modlinkage);
276 	if (status != 0) {
277 		mac_fini_ops(&vnetops);
278 	}
279 	vdds_mod_init();
280 	DBG1(NULL, "exit(%d)\n", status);
281 	return (status);
282 }
283 
284 /* _fini(9E): prepare the module for unloading. */
285 int
286 _fini(void)
287 {
288 	int status;
289 
290 	DBG1(NULL, "enter\n");
291 
292 	status = mod_remove(&modlinkage);
293 	if (status != 0)
294 		return (status);
295 	mac_fini_ops(&vnetops);
296 	vdds_mod_fini();
297 
298 	DBG1(NULL, "exit(%d)\n", status);
299 	return (status);
300 }
301 
302 /* _info(9E): return information about the loadable module */
303 int
304 _info(struct modinfo *modinfop)
305 {
306 	return (mod_info(&modlinkage, modinfop));
307 }
308 
309 /*
310  * attach(9E): attach a device to the system.
311  * called once for each instance of the device on the system.
312  */
313 static int
314 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
315 {
316 	vnet_t		*vnetp;
317 	int		status;
318 	int		instance;
319 	uint64_t	reg;
320 	char		qname[TASKQ_NAMELEN];
321 	enum	{ AST_init = 0x0, AST_vnet_alloc = 0x1,
322 		AST_mac_alloc = 0x2, AST_read_macaddr = 0x4,
323 		AST_vgen_init = 0x8, AST_fdbh_alloc = 0x10,
324 		AST_vdds_init = 0x20, AST_taskq_create = 0x40,
325 		AST_vnet_list = 0x80 } attach_state;
326 
327 	attach_state = AST_init;
328 
329 	switch (cmd) {
330 	case DDI_ATTACH:
331 		break;
332 	case DDI_RESUME:
333 	case DDI_PM_RESUME:
334 	default:
335 		goto vnet_attach_fail;
336 	}
337 
338 	instance = ddi_get_instance(dip);
339 	DBG1(NULL, "instance(%d) enter\n", instance);
340 
341 	/* allocate vnet_t and mac_t structures */
342 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
343 	vnetp->dip = dip;
344 	vnetp->instance = instance;
345 	rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL);
346 	rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
347 	attach_state |= AST_vnet_alloc;
348 
349 	status = vdds_init(vnetp);
350 	if (status != 0) {
351 		goto vnet_attach_fail;
352 	}
353 	attach_state |= AST_vdds_init;
354 
355 	/* setup links to vnet_t from both devinfo and mac_t */
356 	ddi_set_driver_private(dip, (caddr_t)vnetp);
357 
358 	/* read the mac address */
359 	status = vnet_read_mac_address(vnetp);
360 	if (status != DDI_SUCCESS) {
361 		goto vnet_attach_fail;
362 	}
363 	attach_state |= AST_read_macaddr;
364 
365 	reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
366 	    DDI_PROP_DONTPASS, "reg", -1);
367 	if (reg == -1) {
368 		goto vnet_attach_fail;
369 	}
370 	vnetp->reg = reg;
371 
372 	vnet_fdb_create(vnetp);
373 	attach_state |= AST_fdbh_alloc;
374 
375 	(void) snprintf(qname, TASKQ_NAMELEN, "vnet_taskq%d", instance);
376 	if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1,
377 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
378 		cmn_err(CE_WARN, "!vnet%d: Unable to create task queue",
379 		    instance);
380 		goto vnet_attach_fail;
381 	}
382 	attach_state |= AST_taskq_create;
383 
384 	/* add to the list of vnet devices */
385 	WRITE_ENTER(&vnet_rw);
386 	vnetp->nextp = vnet_headp;
387 	vnet_headp = vnetp;
388 	RW_EXIT(&vnet_rw);
389 
390 	attach_state |= AST_vnet_list;
391 
392 	/*
393 	 * Initialize the generic vnet plugin which provides
394 	 * communication via sun4v LDC (logical domain channel) based
395 	 * resources. It will register the LDC resources as and when
396 	 * they become available.
397 	 */
398 	status = vgen_init(vnetp, reg, vnetp->dip,
399 	    (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
400 	if (status != DDI_SUCCESS) {
401 		DERR(vnetp, "vgen_init() failed\n");
402 		goto vnet_attach_fail;
403 	}
404 	attach_state |= AST_vgen_init;
405 
406 	/* register with MAC layer */
407 	status = vnet_mac_register(vnetp);
408 	if (status != DDI_SUCCESS) {
409 		goto vnet_attach_fail;
410 	}
411 
412 	DBG1(NULL, "instance(%d) exit\n", instance);
413 	return (DDI_SUCCESS);
414 
415 vnet_attach_fail:
416 
417 	if (attach_state & AST_vnet_list) {
418 		vnet_t		**vnetpp;
419 		/* unlink from instance(vnet_t) list */
420 		WRITE_ENTER(&vnet_rw);
421 		for (vnetpp = &vnet_headp; *vnetpp;
422 		    vnetpp = &(*vnetpp)->nextp) {
423 			if (*vnetpp == vnetp) {
424 				*vnetpp = vnetp->nextp;
425 				break;
426 			}
427 		}
428 		RW_EXIT(&vnet_rw);
429 	}
430 
431 	if (attach_state & AST_vdds_init) {
432 		vdds_cleanup(vnetp);
433 	}
434 	if (attach_state & AST_taskq_create) {
435 		ddi_taskq_destroy(vnetp->taskqp);
436 	}
437 	if (attach_state & AST_fdbh_alloc) {
438 		vnet_fdb_destroy(vnetp);
439 	}
440 	if (attach_state & AST_vgen_init) {
441 		(void) vgen_uninit(vnetp->vgenhdl);
442 	}
443 	if (attach_state & AST_vnet_alloc) {
444 		rw_destroy(&vnetp->vrwlock);
445 		rw_destroy(&vnetp->vsw_fp_rw);
446 		KMEM_FREE(vnetp);
447 	}
448 	return (DDI_FAILURE);
449 }
450 
451 /*
452  * detach(9E): detach a device from the system.
453  */
454 static int
455 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
456 {
457 	vnet_t		*vnetp;
458 	vnet_t		**vnetpp;
459 	int		instance;
460 	int		rv;
461 
462 	instance = ddi_get_instance(dip);
463 	DBG1(NULL, "instance(%d) enter\n", instance);
464 
465 	vnetp = ddi_get_driver_private(dip);
466 	if (vnetp == NULL) {
467 		goto vnet_detach_fail;
468 	}
469 
470 	switch (cmd) {
471 	case DDI_DETACH:
472 		break;
473 	case DDI_SUSPEND:
474 	case DDI_PM_SUSPEND:
475 	default:
476 		goto vnet_detach_fail;
477 	}
478 
479 	(void) vdds_cleanup(vnetp);
480 	rv = vgen_uninit(vnetp->vgenhdl);
481 	if (rv != DDI_SUCCESS) {
482 		goto vnet_detach_fail;
483 	}
484 
485 	/*
486 	 * Unregister from the MAC subsystem.  This can fail, in
487 	 * particular if there are DLPI style-2 streams still open -
488 	 * in which case we just return failure.
489 	 */
490 	if (mac_unregister(vnetp->mh) != 0)
491 		goto vnet_detach_fail;
492 
493 	/* unlink from instance(vnet_t) list */
494 	WRITE_ENTER(&vnet_rw);
495 	for (vnetpp = &vnet_headp; *vnetpp; vnetpp = &(*vnetpp)->nextp) {
496 		if (*vnetpp == vnetp) {
497 			*vnetpp = vnetp->nextp;
498 			break;
499 		}
500 	}
501 	RW_EXIT(&vnet_rw);
502 
503 	ddi_taskq_destroy(vnetp->taskqp);
504 	/* destroy fdb */
505 	vnet_fdb_destroy(vnetp);
506 
507 	rw_destroy(&vnetp->vrwlock);
508 	rw_destroy(&vnetp->vsw_fp_rw);
509 	KMEM_FREE(vnetp);
510 
511 	return (DDI_SUCCESS);
512 
513 vnet_detach_fail:
514 	return (DDI_FAILURE);
515 }
516 
517 /* enable the device for transmit/receive */
518 static int
519 vnet_m_start(void *arg)
520 {
521 	vnet_t		*vnetp = arg;
522 
523 	DBG1(vnetp, "enter\n");
524 
525 	WRITE_ENTER(&vnetp->vrwlock);
526 	vnetp->flags |= VNET_STARTED;
527 	vnet_start_resources(vnetp);
528 	RW_EXIT(&vnetp->vrwlock);
529 
530 	DBG1(vnetp, "exit\n");
531 	return (VNET_SUCCESS);
532 
533 }
534 
535 /* stop transmit/receive for the device */
536 static void
537 vnet_m_stop(void *arg)
538 {
539 	vnet_t		*vnetp = arg;
540 
541 	DBG1(vnetp, "enter\n");
542 
543 	WRITE_ENTER(&vnetp->vrwlock);
544 	if (vnetp->flags & VNET_STARTED) {
545 		vnet_stop_resources(vnetp);
546 		vnetp->flags &= ~VNET_STARTED;
547 	}
548 	RW_EXIT(&vnetp->vrwlock);
549 
550 	DBG1(vnetp, "exit\n");
551 }
552 
553 /* set the unicast mac address of the device */
554 static int
555 vnet_m_unicst(void *arg, const uint8_t *macaddr)
556 {
557 	_NOTE(ARGUNUSED(macaddr))
558 
559 	vnet_t *vnetp = arg;
560 
561 	DBG1(vnetp, "enter\n");
562 	/*
563 	 * NOTE: setting mac address dynamically is not supported.
564 	 */
565 	DBG1(vnetp, "exit\n");
566 
567 	return (VNET_FAILURE);
568 }
569 
570 /* enable/disable a multicast address */
571 static int
572 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
573 {
574 	_NOTE(ARGUNUSED(add, mca))
575 
576 	vnet_t *vnetp = arg;
577 	vnet_res_t	*vresp;
578 	mac_register_t	*macp;
579 	mac_callbacks_t	*cbp;
580 	int rv = VNET_SUCCESS;
581 
582 	DBG1(vnetp, "enter\n");
583 
584 	READ_ENTER(&vnetp->vrwlock);
585 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
586 		if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
587 			macp = &vresp->macreg;
588 			cbp = macp->m_callbacks;
589 			rv = cbp->mc_multicst(macp->m_driver, add, mca);
590 		}
591 	}
592 	RW_EXIT(&vnetp->vrwlock);
593 
594 	DBG1(vnetp, "exit(%d)\n", rv);
595 	return (rv);
596 }
597 
598 /* set or clear promiscuous mode on the device */
599 static int
600 vnet_m_promisc(void *arg, boolean_t on)
601 {
602 	_NOTE(ARGUNUSED(on))
603 
604 	vnet_t *vnetp = arg;
605 	DBG1(vnetp, "enter\n");
606 	/*
607 	 * NOTE: setting promiscuous mode is not supported, just return success.
608 	 */
609 	DBG1(vnetp, "exit\n");
610 	return (VNET_SUCCESS);
611 }
612 
613 /*
614  * Transmit a chain of packets. This function provides switching functionality
615  * based on the destination mac address to reach other guests (within ldoms) or
616  * external hosts.
617  */
618 mblk_t *
619 vnet_m_tx(void *arg, mblk_t *mp)
620 {
621 	vnet_t			*vnetp;
622 	vnet_res_t		*vresp;
623 	mblk_t			*next;
624 	mblk_t			*resid_mp;
625 	mac_register_t		*macp;
626 	struct ether_header	*ehp;
627 	boolean_t		is_unicast;
628 
629 	vnetp = (vnet_t *)arg;
630 	DBG1(vnetp, "enter\n");
631 	ASSERT(mp != NULL);
632 
633 	while (mp != NULL) {
634 
635 		next = mp->b_next;
636 		mp->b_next = NULL;
637 
638 		/*
639 		 * Find fdb entry for the destination
640 		 * and hold a reference to it.
641 		 */
642 		ehp = (struct ether_header *)mp->b_rptr;
643 		vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
644 		if (vresp != NULL) {
645 
646 			/*
647 			 * Destination found in FDB.
648 			 * The destination is a vnet device within ldoms
649 			 * and directly reachable, invoke the tx function
650 			 * in the fdb entry.
651 			 */
652 			macp = &vresp->macreg;
653 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
654 
655 			/* tx done; now release ref on fdb entry */
656 			VNET_FDBE_REFRELE(vresp);
657 
658 			if (resid_mp != NULL) {
659 				/* m_tx failed */
660 				mp->b_next = next;
661 				break;
662 			}
663 		} else {
664 			is_unicast = !(IS_BROADCAST(ehp) ||
665 			    (IS_MULTICAST(ehp)));
666 			/*
667 			 * Destination is not in FDB.
668 			 * If the destination is broadcast or multicast,
669 			 * then forward the packet to vswitch.
670 			 * If a Hybrid resource avilable, then send the
671 			 * unicast packet via hybrid resource, otherwise
672 			 * forward it to vswitch.
673 			 */
674 			READ_ENTER(&vnetp->vsw_fp_rw);
675 
676 			if ((is_unicast) && (vnetp->hio_fp != NULL)) {
677 				vresp = vnetp->hio_fp;
678 			} else {
679 				vresp = vnetp->vsw_fp;
680 			}
681 			if (vresp == NULL) {
682 				/*
683 				 * no fdb entry to vsw? drop the packet.
684 				 */
685 				RW_EXIT(&vnetp->vsw_fp_rw);
686 				freemsg(mp);
687 				mp = next;
688 				continue;
689 			}
690 
691 			/* ref hold the fdb entry to vsw */
692 			VNET_FDBE_REFHOLD(vresp);
693 
694 			RW_EXIT(&vnetp->vsw_fp_rw);
695 
696 			macp = &vresp->macreg;
697 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
698 
699 			/* tx done; now release ref on fdb entry */
700 			VNET_FDBE_REFRELE(vresp);
701 
702 			if (resid_mp != NULL) {
703 				/* m_tx failed */
704 				mp->b_next = next;
705 				break;
706 			}
707 		}
708 
709 		mp = next;
710 	}
711 
712 	DBG1(vnetp, "exit\n");
713 	return (mp);
714 }
715 
716 /* get statistics from the device */
717 int
718 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
719 {
720 	vnet_t *vnetp = arg;
721 	vnet_res_t	*vresp;
722 	mac_register_t	*macp;
723 	mac_callbacks_t	*cbp;
724 	uint64_t val_total = 0;
725 
726 	DBG1(vnetp, "enter\n");
727 
728 	/*
729 	 * get the specified statistic from each transport and return the
730 	 * aggregate val.  This obviously only works for counters.
731 	 */
732 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
733 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
734 		return (ENOTSUP);
735 	}
736 
737 	READ_ENTER(&vnetp->vrwlock);
738 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
739 		macp = &vresp->macreg;
740 		cbp = macp->m_callbacks;
741 		if (cbp->mc_getstat(macp->m_driver, stat, val) == 0)
742 			val_total += *val;
743 	}
744 	RW_EXIT(&vnetp->vrwlock);
745 
746 	*val = val_total;
747 
748 	DBG1(vnetp, "exit\n");
749 	return (0);
750 }
751 
752 /* wrapper function for mac_register() */
753 static int
754 vnet_mac_register(vnet_t *vnetp)
755 {
756 	mac_register_t	*macp;
757 	int		err;
758 
759 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
760 		return (DDI_FAILURE);
761 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
762 	macp->m_driver = vnetp;
763 	macp->m_dip = vnetp->dip;
764 	macp->m_src_addr = vnetp->curr_macaddr;
765 	macp->m_callbacks = &vnet_m_callbacks;
766 	macp->m_min_sdu = 0;
767 	macp->m_max_sdu = vnetp->mtu;
768 	macp->m_margin = VLAN_TAGSZ;
769 
770 	/*
771 	 * Finally, we're ready to register ourselves with the MAC layer
772 	 * interface; if this succeeds, we're all ready to start()
773 	 */
774 	err = mac_register(macp, &vnetp->mh);
775 	mac_free(macp);
776 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
777 }
778 
779 /* read the mac address of the device */
780 static int
781 vnet_read_mac_address(vnet_t *vnetp)
782 {
783 	uchar_t 	*macaddr;
784 	uint32_t 	size;
785 	int 		rv;
786 
787 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
788 	    DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
789 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
790 		DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
791 		    macaddr_propname, rv);
792 		return (DDI_FAILURE);
793 	}
794 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
795 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
796 	ddi_prop_free(macaddr);
797 
798 	return (DDI_SUCCESS);
799 }
800 
801 static void
802 vnet_fdb_create(vnet_t *vnetp)
803 {
804 	char		hashname[MAXNAMELEN];
805 
806 	(void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
807 	    vnetp->instance);
808 	vnetp->fdb_nchains = vnet_fdb_nchains;
809 	vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
810 	    mod_hash_null_valdtor, sizeof (void *));
811 }
812 
813 static void
814 vnet_fdb_destroy(vnet_t *vnetp)
815 {
816 	/* destroy fdb-hash-table */
817 	if (vnetp->fdb_hashp != NULL) {
818 		mod_hash_destroy_hash(vnetp->fdb_hashp);
819 		vnetp->fdb_hashp = NULL;
820 		vnetp->fdb_nchains = 0;
821 	}
822 }
823 
824 /*
825  * Add an entry into the fdb.
826  */
827 void
828 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp)
829 {
830 	uint64_t	addr = 0;
831 	int		rv;
832 
833 	KEY_HASH(addr, vresp->rem_macaddr);
834 
835 	/*
836 	 * If the entry being added corresponds to LDC_SERVICE resource,
837 	 * that is, vswitch connection, it is added to the hash and also
838 	 * the entry is cached, an additional reference count reflects
839 	 * this. The HYBRID resource is not added to the hash, but only
840 	 * cached, as it is only used for sending out packets for unknown
841 	 * unicast destinations.
842 	 */
843 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
844 	    (vresp->refcnt = 1) : (vresp->refcnt = 0);
845 
846 	/*
847 	 * Note: duplicate keys will be rejected by mod_hash.
848 	 */
849 	if (vresp->type != VIO_NET_RES_HYBRID) {
850 		rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
851 		    (mod_hash_val_t)vresp);
852 		if (rv != 0) {
853 			DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
854 			return;
855 		}
856 	}
857 
858 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
859 		/* Cache the fdb entry to vsw-port */
860 		WRITE_ENTER(&vnetp->vsw_fp_rw);
861 		if (vnetp->vsw_fp == NULL)
862 			vnetp->vsw_fp = vresp;
863 		RW_EXIT(&vnetp->vsw_fp_rw);
864 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
865 		/* Cache the fdb entry to hybrid resource */
866 		WRITE_ENTER(&vnetp->vsw_fp_rw);
867 		if (vnetp->hio_fp == NULL)
868 			vnetp->hio_fp = vresp;
869 		RW_EXIT(&vnetp->vsw_fp_rw);
870 	}
871 }
872 
873 /*
874  * Remove an entry from fdb.
875  */
876 static void
877 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp)
878 {
879 	uint64_t	addr = 0;
880 	int		rv;
881 	uint32_t	refcnt;
882 	vnet_res_t	*tmp;
883 
884 	KEY_HASH(addr, vresp->rem_macaddr);
885 
886 	/*
887 	 * Remove the entry from fdb hash table.
888 	 * This prevents further references to this fdb entry.
889 	 */
890 	if (vresp->type != VIO_NET_RES_HYBRID) {
891 		rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
892 		    (mod_hash_val_t *)&tmp);
893 		if (rv != 0) {
894 			/*
895 			 * As the resources are added to the hash only
896 			 * after they are started, this can occur if
897 			 * a resource unregisters before it is ever started.
898 			 */
899 			return;
900 		}
901 	}
902 
903 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
904 		WRITE_ENTER(&vnetp->vsw_fp_rw);
905 
906 		ASSERT(tmp == vnetp->vsw_fp);
907 		vnetp->vsw_fp = NULL;
908 
909 		RW_EXIT(&vnetp->vsw_fp_rw);
910 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
911 		WRITE_ENTER(&vnetp->vsw_fp_rw);
912 
913 		vnetp->hio_fp = NULL;
914 
915 		RW_EXIT(&vnetp->vsw_fp_rw);
916 	}
917 
918 	/*
919 	 * If there are threads already ref holding before the entry was
920 	 * removed from hash table, then wait for ref count to drop to zero.
921 	 */
922 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
923 	    (refcnt = 1) : (refcnt = 0);
924 	while (vresp->refcnt > refcnt) {
925 		delay(drv_usectohz(vnet_fdbe_refcnt_delay));
926 	}
927 }
928 
929 /*
930  * Search fdb for a given mac address. If an entry is found, hold
931  * a reference to it and return the entry; else returns NULL.
932  */
933 static vnet_res_t *
934 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
935 {
936 	uint64_t	key = 0;
937 	vnet_res_t	*vresp;
938 	int		rv;
939 
940 	KEY_HASH(key, addrp->ether_addr_octet);
941 
942 	rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
943 	    (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb);
944 
945 	if (rv != 0)
946 		return (NULL);
947 
948 	return (vresp);
949 }
950 
951 /*
952  * Callback function provided to mod_hash_find_cb(). After finding the fdb
953  * entry corresponding to the key (macaddr), this callback will be invoked by
954  * mod_hash_find_cb() to atomically increment the reference count on the fdb
955  * entry before returning the found entry.
956  */
957 static void
958 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
959 {
960 	_NOTE(ARGUNUSED(key))
961 	VNET_FDBE_REFHOLD((vnet_res_t *)val);
962 }
963 
964 static void
965 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
966 {
967 	vnet_res_t *vresp = (vnet_res_t *)vrh;
968 	vnet_t *vnetp = vresp->vnetp;
969 
970 	if ((vnetp != NULL) && (vnetp->mh)) {
971 		mac_rx(vnetp->mh, NULL, mp);
972 	} else {
973 		freemsgchain(mp);
974 	}
975 }
976 
977 void
978 vnet_tx_update(vio_net_handle_t vrh)
979 {
980 	vnet_res_t *vresp = (vnet_res_t *)vrh;
981 	vnet_t *vnetp = vresp->vnetp;
982 
983 	if ((vnetp != NULL) && (vnetp->mh != NULL)) {
984 		mac_tx_update(vnetp->mh);
985 	}
986 }
987 
988 /*
989  * Update the new mtu of vnet into the mac layer. First check if the device has
990  * been plumbed and if so fail the mtu update. Returns 0 on success.
991  */
992 int
993 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu)
994 {
995 	int	rv;
996 
997 	if (vnetp == NULL || vnetp->mh == NULL) {
998 		return (EINVAL);
999 	}
1000 
1001 	WRITE_ENTER(&vnetp->vrwlock);
1002 
1003 	if (vnetp->flags & VNET_STARTED) {
1004 		RW_EXIT(&vnetp->vrwlock);
1005 		cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu "
1006 		    "update as the device is plumbed\n",
1007 		    vnetp->instance);
1008 		return (EBUSY);
1009 	}
1010 
1011 	/* update mtu in the mac layer */
1012 	rv = mac_maxsdu_update(vnetp->mh, mtu);
1013 	if (rv != 0) {
1014 		RW_EXIT(&vnetp->vrwlock);
1015 		cmn_err(CE_NOTE,
1016 		    "!vnet%d: Unable to update mtu with mac layer\n",
1017 		    vnetp->instance);
1018 		return (EIO);
1019 	}
1020 
1021 	vnetp->mtu = mtu;
1022 
1023 	RW_EXIT(&vnetp->vrwlock);
1024 
1025 	return (0);
1026 }
1027 
1028 /*
1029  * vio_net_resource_reg -- An interface called to register a resource
1030  *	with vnet.
1031  *	macp -- a GLDv3 mac_register that has all the details of
1032  *		a resource and its callbacks etc.
1033  *	type -- resource type.
1034  *	local_macaddr -- resource's MAC address. This is used to
1035  *			 associate a resource with a corresponding vnet.
1036  *	remote_macaddr -- remote side MAC address. This is ignored for
1037  *			  the Hybrid resources.
1038  *	vhp -- A handle returned to the caller.
1039  *	vcb -- A set of callbacks provided to the callers.
1040  */
1041 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
1042     ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
1043     vio_net_callbacks_t *vcb)
1044 {
1045 	vnet_t	*vnetp;
1046 	vnet_res_t *vresp;
1047 
1048 	vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
1049 	ether_copy(local_macaddr, vresp->local_macaddr);
1050 	ether_copy(rem_macaddr, vresp->rem_macaddr);
1051 	vresp->type = type;
1052 	bcopy(macp, &vresp->macreg, sizeof (mac_register_t));
1053 
1054 	DBG1(NULL, "Resource Registerig type=0%X\n", type);
1055 
1056 	READ_ENTER(&vnet_rw);
1057 	vnetp = vnet_headp;
1058 	while (vnetp != NULL) {
1059 		if (VNET_MATCH_RES(vresp, vnetp)) {
1060 			WRITE_ENTER(&vnetp->vrwlock);
1061 			vresp->vnetp = vnetp;
1062 			vresp->nextp = vnetp->vres_list;
1063 			vnetp->vres_list = vresp;
1064 			RW_EXIT(&vnetp->vrwlock);
1065 			break;
1066 		}
1067 		vnetp = vnetp->nextp;
1068 	}
1069 	RW_EXIT(&vnet_rw);
1070 	if (vresp->vnetp == NULL) {
1071 		DWARN(NULL, "No vnet instance");
1072 		kmem_free(vresp, sizeof (vnet_res_t));
1073 		return (ENXIO);
1074 	}
1075 
1076 	*vhp = vresp;
1077 	vcb->vio_net_rx_cb = vnet_rx;
1078 	vcb->vio_net_tx_update = vnet_tx_update;
1079 	vcb->vio_net_report_err = vnet_handle_res_err;
1080 
1081 	/* Dispatch a task to start resources */
1082 	vnet_dispatch_res_task(vnetp);
1083 	return (0);
1084 }
1085 
1086 /*
1087  * vio_net_resource_unreg -- An interface to unregister a resource.
1088  */
1089 void
1090 vio_net_resource_unreg(vio_net_handle_t vhp)
1091 {
1092 	vnet_res_t *vresp = (vnet_res_t *)vhp;
1093 	vnet_t *vnetp = vresp->vnetp;
1094 	vnet_res_t *vrp;
1095 
1096 	DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
1097 
1098 	ASSERT(vnetp != NULL);
1099 	vnet_fdbe_del(vnetp, vresp);
1100 
1101 	WRITE_ENTER(&vnetp->vrwlock);
1102 	if (vresp == vnetp->vres_list) {
1103 		vnetp->vres_list = vresp->nextp;
1104 	} else {
1105 		vrp = vnetp->vres_list;
1106 		while (vrp->nextp != NULL) {
1107 			if (vrp->nextp == vresp) {
1108 				vrp->nextp = vresp->nextp;
1109 				break;
1110 			}
1111 			vrp = vrp->nextp;
1112 		}
1113 	}
1114 	vresp->vnetp = NULL;
1115 	vresp->nextp = NULL;
1116 	RW_EXIT(&vnetp->vrwlock);
1117 	KMEM_FREE(vresp);
1118 }
1119 
1120 /*
1121  * vnet_dds_rx -- an interface called by vgen to DDS messages.
1122  */
1123 void
1124 vnet_dds_rx(void *arg, void *dmsg)
1125 {
1126 	vnet_t *vnetp = arg;
1127 	vdds_process_dds_msg(vnetp, dmsg);
1128 }
1129 
1130 /*
1131  * vnet_send_dds_msg -- An interface provided to DDS to send
1132  *	DDS messages. This simply sends meessages via vgen.
1133  */
1134 int
1135 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg)
1136 {
1137 	int rv;
1138 
1139 	if (vnetp->vgenhdl != NULL) {
1140 		rv = vgen_dds_tx(vnetp->vgenhdl, dmsg);
1141 	}
1142 	return (rv);
1143 }
1144 
1145 /*
1146  * vnet_handle_res_err -- A callback function called by a resource
1147  *	to report an error. For example, vgen can call to report
1148  *	an LDC down/reset event. This will trigger cleanup of associated
1149  *	Hybrid resource.
1150  */
1151 /* ARGSUSED */
1152 static void
1153 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err)
1154 {
1155 	vnet_res_t *vresp = (vnet_res_t *)vrh;
1156 	vnet_t *vnetp = vresp->vnetp;
1157 
1158 	if (vnetp == NULL) {
1159 		return;
1160 	}
1161 	if ((vresp->type != VIO_NET_RES_LDC_SERVICE) &&
1162 	    (vresp->type != VIO_NET_RES_HYBRID)) {
1163 		return;
1164 	}
1165 	vdds_cleanup_hybrid_res(vnetp);
1166 }
1167 
1168 /*
1169  * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
1170  */
1171 static void
1172 vnet_dispatch_res_task(vnet_t *vnetp)
1173 {
1174 	int rv;
1175 
1176 	WRITE_ENTER(&vnetp->vrwlock);
1177 	if (vnetp->flags & VNET_STARTED) {
1178 		rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task,
1179 		    vnetp, DDI_NOSLEEP);
1180 		if (rv != DDI_SUCCESS) {
1181 			cmn_err(CE_WARN,
1182 			    "vnet%d:Can't dispatch start resource task",
1183 			    vnetp->instance);
1184 		}
1185 	}
1186 	RW_EXIT(&vnetp->vrwlock);
1187 }
1188 
1189 /*
1190  * vnet_res_start_task -- A taskq callback function that starts a resource.
1191  */
1192 static void
1193 vnet_res_start_task(void *arg)
1194 {
1195 	vnet_t *vnetp = arg;
1196 
1197 	WRITE_ENTER(&vnetp->vrwlock);
1198 	if (vnetp->flags & VNET_STARTED) {
1199 		vnet_start_resources(vnetp);
1200 	}
1201 	RW_EXIT(&vnetp->vrwlock);
1202 }
1203 
1204 /*
1205  * vnet_start_resources -- starts all resources associated with
1206  *	a vnet.
1207  */
1208 static void
1209 vnet_start_resources(vnet_t *vnetp)
1210 {
1211 	mac_register_t	*macp;
1212 	mac_callbacks_t	*cbp;
1213 	vnet_res_t	*vresp;
1214 	int rv;
1215 
1216 	DBG1(vnetp, "enter\n");
1217 
1218 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
1219 		/* skip if it is already started */
1220 		if (vresp->flags & VNET_STARTED) {
1221 			continue;
1222 		}
1223 		macp = &vresp->macreg;
1224 		cbp = macp->m_callbacks;
1225 		rv = cbp->mc_start(macp->m_driver);
1226 		if (rv == 0) {
1227 			/*
1228 			 * Successfully started the resource, so now
1229 			 * add it to the fdb.
1230 			 */
1231 			vresp->flags |= VNET_STARTED;
1232 			vnet_fdbe_add(vnetp, vresp);
1233 		}
1234 	}
1235 
1236 	DBG1(vnetp, "exit\n");
1237 
1238 }
1239 
1240 /*
1241  * vnet_stop_resources -- stop all resources associated with a vnet.
1242  */
1243 static void
1244 vnet_stop_resources(vnet_t *vnetp)
1245 {
1246 	vnet_res_t	*vresp;
1247 	vnet_res_t	*nvresp;
1248 	mac_register_t	*macp;
1249 	mac_callbacks_t	*cbp;
1250 
1251 	DBG1(vnetp, "enter\n");
1252 
1253 	for (vresp = vnetp->vres_list; vresp != NULL; ) {
1254 		nvresp = vresp->nextp;
1255 		if (vresp->flags & VNET_STARTED) {
1256 			macp = &vresp->macreg;
1257 			cbp = macp->m_callbacks;
1258 			cbp->mc_stop(macp->m_driver);
1259 			vresp->flags &= ~VNET_STARTED;
1260 		}
1261 		vresp = nvresp;
1262 	}
1263 	DBG1(vnetp, "exit\n");
1264 }
1265