xref: /illumos-gate/usr/src/uts/sun4v/io/vnet.c (revision 60405de4d8688d96dd05157c28db3ade5c9bc234)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/dlpi.h>
42 #include <net/if.h>
43 #include <sys/mac.h>
44 #include <sys/mac_ether.h>
45 #include <sys/ddi.h>
46 #include <sys/sunddi.h>
47 #include <sys/strsun.h>
48 #include <sys/note.h>
49 #include <sys/vnet.h>
50 
51 /*
52  * Function prototypes.
53  */
54 
55 /* DDI entrypoints */
56 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
57 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
58 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
59 
60 /* MAC entrypoints  */
61 static int vnet_m_stat(void *, uint_t, uint64_t *);
62 static int vnet_m_start(void *);
63 static void vnet_m_stop(void *);
64 static int vnet_m_promisc(void *, boolean_t);
65 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
66 static int vnet_m_unicst(void *, const uint8_t *);
67 mblk_t *vnet_m_tx(void *, mblk_t *);
68 
69 /* vnet internal functions */
70 static int vnet_mac_register(vnet_t *);
71 static int vnet_read_mac_address(vnet_t *vnetp);
72 static void vnet_add_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp);
73 static void vnet_del_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp);
74 static vp_tl_t *vnet_get_vptl(vnet_t *vnetp, const char *devname);
75 static fdb_t *vnet_lookup_fdb(fdb_fanout_t *fdbhp, uint8_t *macaddr);
76 
77 /* exported functions */
78 void vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
79 void vnet_del_fdb(void *arg, uint8_t *macaddr);
80 void vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx,
81 	void *txarg, boolean_t upgrade);
82 void vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg);
83 void vnet_del_def_rte(void *arg);
84 void vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp);
85 void vnet_tx_update(void *arg);
86 
87 /* externs */
88 extern int vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
89 	mac_register_t **vgenmacp);
90 extern int vgen_uninit(void *arg);
91 
92 static mac_callbacks_t vnet_m_callbacks = {
93 	0,
94 	vnet_m_stat,
95 	vnet_m_start,
96 	vnet_m_stop,
97 	vnet_m_promisc,
98 	vnet_m_multicst,
99 	vnet_m_unicst,
100 	vnet_m_tx,
101 	NULL,
102 	NULL,
103 	NULL
104 };
105 
106 /*
107  * Linked list of "vnet_t" structures - one per instance.
108  */
109 static vnet_t	*vnet_headp = NULL;
110 static krwlock_t vnet_rw;
111 
112 /* Tunables */
113 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
114 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
115 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
116 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
117 uint32_t vnet_nfdb_hash = VNET_NFDB_HASH;	/* size of fdb hash table */
118 uint32_t vnet_nrbufs = VNET_NRBUFS;	/* number of receive buffers */
119 
120 /*
121  * Property names
122  */
123 static char macaddr_propname[] = "local-mac-address";
124 
125 /*
126  * This is the string displayed by modinfo(1m).
127  */
128 static char vnet_ident[] = "vnet driver v%I%";
129 extern struct mod_ops mod_driverops;
130 static struct cb_ops cb_vnetops = {
131 	nulldev,		/* cb_open */
132 	nulldev,		/* cb_close */
133 	nodev,			/* cb_strategy */
134 	nodev,			/* cb_print */
135 	nodev,			/* cb_dump */
136 	nodev,			/* cb_read */
137 	nodev,			/* cb_write */
138 	nodev,			/* cb_ioctl */
139 	nodev,			/* cb_devmap */
140 	nodev,			/* cb_mmap */
141 	nodev,			/* cb_segmap */
142 	nochpoll,		/* cb_chpoll */
143 	ddi_prop_op,		/* cb_prop_op */
144 	NULL,			/* cb_stream */
145 	(int)(D_MP)		/* cb_flag */
146 };
147 
148 static struct dev_ops vnetops = {
149 	DEVO_REV,		/* devo_rev */
150 	0,			/* devo_refcnt */
151 	NULL,			/* devo_getinfo */
152 	nulldev,		/* devo_identify */
153 	nulldev,		/* devo_probe */
154 	vnetattach,		/* devo_attach */
155 	vnetdetach,		/* devo_detach */
156 	nodev,			/* devo_reset */
157 	&cb_vnetops,		/* devo_cb_ops */
158 	(struct bus_ops *)NULL	/* devo_bus_ops */
159 };
160 
161 static struct modldrv modldrv = {
162 	&mod_driverops,		/* Type of module.  This one is a driver */
163 	vnet_ident,		/* ID string */
164 	&vnetops		/* driver specific ops */
165 };
166 
167 static struct modlinkage modlinkage = {
168 	MODREV_1, (void *)&modldrv, NULL
169 };
170 
171 
172 /*
173  * Print debug messages - set to 0xf to enable all msgs
174  */
175 int _vnet_dbglevel = 0x8;
176 
177 void
178 _vnetdebug_printf(void *arg, const char *fmt, ...)
179 {
180 	char    buf[512];
181 	va_list ap;
182 	vnet_t *vnetp = (vnet_t *)arg;
183 
184 	va_start(ap, fmt);
185 	(void) vsprintf(buf, fmt, ap);
186 	va_end(ap);
187 
188 	if (vnetp == NULL)
189 		cmn_err(CE_CONT, "%s\n", buf);
190 	else
191 		cmn_err(CE_CONT, "vnet%d: %s\n", vnetp->instance, buf);
192 }
193 
194 #ifdef DEBUG
195 
196 /*
197  * NOTE: any changes to the definitions below need corresponding changes in
198  * vnet_gen.c
199  */
200 
201 /*
202  * debug levels:
203  * DBG_LEVEL1:	Function entry/exit tracing
204  * DBG_LEVEL2:	Info messages
205  * DBG_LEVEL3:	Warning messages
206  * DBG_LEVEL4:	Error messages
207  */
208 
209 enum	{ DBG_LEVEL1 = 0x01, DBG_LEVEL2 = 0x02, DBG_LEVEL3 = 0x04,
210 	    DBG_LEVEL4 = 0x08 };
211 
212 #define	DBG1(_s)	do {						\
213 			    if ((_vnet_dbglevel & DBG_LEVEL1) != 0) {	\
214 					_vnetdebug_printf _s;		\
215 			    }					\
216 			_NOTE(CONSTCOND) } while (0)
217 
218 #define	DBG2(_s)	do {						\
219 			    if ((_vnet_dbglevel & DBG_LEVEL2) != 0) {	\
220 					_vnetdebug_printf _s;		\
221 			    }					\
222 			_NOTE(CONSTCOND) } while (0)
223 
224 #define	DWARN(_s)	do {						\
225 			    if ((_vnet_dbglevel & DBG_LEVEL3) != 0) {	\
226 					_vnetdebug_printf _s;		\
227 			    }					\
228 			_NOTE(CONSTCOND) } while (0)
229 
230 #define	DERR(_s)	do {						\
231 			    if ((_vnet_dbglevel & DBG_LEVEL4) != 0) {	\
232 					_vnetdebug_printf _s;		\
233 			    }					\
234 			_NOTE(CONSTCOND) } while (0)
235 
236 #else
237 
238 #define	DBG1(_s)	if (0)	_vnetdebug_printf _s
239 #define	DBG2(_s)	if (0)	_vnetdebug_printf _s
240 #define	DWARN(_s)	if (0)	_vnetdebug_printf _s
241 #define	DERR(_s)	if (0)	_vnetdebug_printf _s
242 
243 #endif
244 
245 /* _init(9E): initialize the loadable module */
246 int
247 _init(void)
248 {
249 	int status;
250 
251 	DBG1((NULL, "_init: enter\n"));
252 
253 	mac_init_ops(&vnetops, "vnet");
254 	status = mod_install(&modlinkage);
255 	if (status != 0) {
256 		mac_fini_ops(&vnetops);
257 	}
258 
259 	DBG1((NULL, "_init: exit\n"));
260 	return (status);
261 }
262 
263 /* _fini(9E): prepare the module for unloading. */
264 int
265 _fini(void)
266 {
267 	int status;
268 
269 	DBG1((NULL, "_fini: enter\n"));
270 
271 	status = mod_remove(&modlinkage);
272 	if (status != 0)
273 		return (status);
274 	mac_fini_ops(&vnetops);
275 
276 	DBG1((NULL, "_fini: exit\n"));
277 	return (status);
278 }
279 
280 /* _info(9E): return information about the loadable module */
281 int
282 _info(struct modinfo *modinfop)
283 {
284 	return (mod_info(&modlinkage, modinfop));
285 }
286 
287 /*
288  * attach(9E): attach a device to the system.
289  * called once for each instance of the device on the system.
290  */
291 static int
292 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
293 {
294 	vnet_t		*vnetp;
295 	vp_tl_t		*vp_tlp;
296 	int		instance;
297 	int		status;
298 	enum		{ AST_init = 0x0, AST_vnet_alloc = 0x1,
299 			    AST_mac_alloc = 0x2, AST_read_macaddr = 0x4,
300 			    AST_vgen_init = 0x8, AST_vptl_alloc = 0x10,
301 			    AST_fdbh_alloc = 0x20 }
302 			attach_state;
303 	mac_register_t	*vgenmacp = NULL;
304 	uint32_t	nfdbh = 0;
305 
306 	attach_state = AST_init;
307 
308 	switch (cmd) {
309 	case DDI_ATTACH:
310 		break;
311 	case DDI_RESUME:
312 	case DDI_PM_RESUME:
313 	default:
314 		goto vnet_attach_fail;
315 	}
316 
317 	instance = ddi_get_instance(dip);
318 	DBG1((NULL, "vnetattach: instance(%d) enter\n", instance));
319 
320 	/* allocate vnet_t and mac_t structures */
321 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
322 	attach_state |= AST_vnet_alloc;
323 
324 	/* setup links to vnet_t from both devinfo and mac_t */
325 	ddi_set_driver_private(dip, (caddr_t)vnetp);
326 	vnetp->dip = dip;
327 	vnetp->instance = instance;
328 
329 	/* read the mac address */
330 	status = vnet_read_mac_address(vnetp);
331 	if (status != DDI_SUCCESS) {
332 		goto vnet_attach_fail;
333 	}
334 	attach_state |= AST_read_macaddr;
335 
336 	/*
337 	 * Initialize the generic vnet proxy transport. This is the first
338 	 * and default transport used by vnet. The generic transport
339 	 * is provided by using sun4v LDC (logical domain channel). On success,
340 	 * vgen_init() provides a pointer to mac_t of generic transport.
341 	 * Currently, this generic layer provides network connectivity to other
342 	 * vnets within ldoms and also to remote hosts oustide ldoms through
343 	 * the virtual switch (vsw) device on domain0. In the future, when
344 	 * physical adapters that are able to share their resources (such as
345 	 * dma channels) with guest domains become available, the vnet device
346 	 * will use hardware specific driver to communicate directly over the
347 	 * physical device to reach remote hosts without going through vswitch.
348 	 */
349 	status = vgen_init(vnetp, vnetp->dip, (uint8_t *)vnetp->curr_macaddr,
350 	    &vgenmacp);
351 	if (status != DDI_SUCCESS) {
352 		DERR((vnetp, "vgen_init() failed\n"));
353 		goto vnet_attach_fail;
354 	}
355 	attach_state |= AST_vgen_init;
356 
357 	vp_tlp = kmem_zalloc(sizeof (vp_tl_t), KM_SLEEP);
358 	vp_tlp->macp = vgenmacp;
359 	(void) snprintf(vp_tlp->name, MAXNAMELEN, "%s%u", "vgen", instance);
360 	(void) strcpy(vnetp->vgen_name, vp_tlp->name);
361 
362 	/* add generic transport to the list of vnet proxy transports */
363 	vnet_add_vptl(vnetp, vp_tlp);
364 	attach_state |= AST_vptl_alloc;
365 
366 	nfdbh = vnet_nfdb_hash;
367 	if ((nfdbh < VNET_NFDB_HASH) || (nfdbh > VNET_NFDB_HASH_MAX)) {
368 		vnetp->nfdb_hash = VNET_NFDB_HASH;
369 	}
370 	else
371 		vnetp->nfdb_hash = nfdbh;
372 
373 	/* allocate fdb hash table, with an extra slot for default route */
374 	vnetp->fdbhp = kmem_zalloc(sizeof (fdb_fanout_t) *
375 	    (vnetp->nfdb_hash + 1), KM_SLEEP);
376 	attach_state |= AST_fdbh_alloc;
377 
378 	/* register with MAC layer */
379 	status = vnet_mac_register(vnetp);
380 	if (status != DDI_SUCCESS) {
381 		goto vnet_attach_fail;
382 	}
383 
384 	/* add to the list of vnet devices */
385 	WRITE_ENTER(&vnet_rw);
386 	vnetp->nextp = vnet_headp;
387 	vnet_headp = vnetp;
388 	RW_EXIT(&vnet_rw);
389 
390 	DBG1((NULL, "vnetattach: instance(%d) exit\n", instance));
391 	return (DDI_SUCCESS);
392 
393 vnet_attach_fail:
394 	if (attach_state & AST_fdbh_alloc) {
395 		kmem_free(vnetp->fdbhp,
396 		    sizeof (fdb_fanout_t) * (vnetp->nfdb_hash + 1));
397 	}
398 	if (attach_state & AST_vptl_alloc) {
399 		WRITE_ENTER(&vnetp->trwlock);
400 		vnet_del_vptl(vnetp, vp_tlp);
401 		RW_EXIT(&vnetp->trwlock);
402 	}
403 	if (attach_state & AST_vgen_init) {
404 		(void) vgen_uninit(vgenmacp->m_driver);
405 	}
406 	if (attach_state & AST_vnet_alloc) {
407 		KMEM_FREE(vnetp);
408 	}
409 	return (DDI_FAILURE);
410 }
411 
412 /*
413  * detach(9E): detach a device from the system.
414  */
415 static int
416 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
417 {
418 	vnet_t		*vnetp;
419 	vnet_t		**vnetpp;
420 	vp_tl_t		*vp_tlp;
421 	int		instance;
422 	int		rv;
423 
424 	instance = ddi_get_instance(dip);
425 	DBG1((NULL, "vnetdetach: instance(%d) enter\n", instance));
426 
427 	vnetp = ddi_get_driver_private(dip);
428 	if (vnetp == NULL) {
429 		goto vnet_detach_fail;
430 	}
431 
432 	switch (cmd) {
433 	case DDI_DETACH:
434 		break;
435 	case DDI_SUSPEND:
436 	case DDI_PM_SUSPEND:
437 	default:
438 		goto vnet_detach_fail;
439 	}
440 
441 	/* uninit and free vnet proxy transports */
442 	WRITE_ENTER(&vnetp->trwlock);
443 	while ((vp_tlp = vnetp->tlp) != NULL) {
444 		if (strcmp(vnetp->vgen_name, vp_tlp->name) == 0) {
445 			/* uninitialize generic transport */
446 			rv = vgen_uninit(vp_tlp->macp->m_driver);
447 			if (rv != DDI_SUCCESS) {
448 				RW_EXIT(&vnetp->trwlock);
449 				goto vnet_detach_fail;
450 			}
451 		}
452 		vnet_del_vptl(vnetp, vp_tlp);
453 	}
454 	RW_EXIT(&vnetp->trwlock);
455 
456 	/*
457 	 * Unregister from the MAC subsystem.  This can fail, in
458 	 * particular if there are DLPI style-2 streams still open -
459 	 * in which case we just return failure.
460 	 */
461 	if (mac_unregister(vnetp->mh) != 0)
462 		goto vnet_detach_fail;
463 
464 	/* unlink from instance(vnet_t) list */
465 	WRITE_ENTER(&vnet_rw);
466 	for (vnetpp = &vnet_headp; *vnetpp; vnetpp = &(*vnetpp)->nextp) {
467 		if (*vnetpp == vnetp) {
468 			*vnetpp = vnetp->nextp;
469 			break;
470 		}
471 	}
472 	RW_EXIT(&vnet_rw);
473 
474 	KMEM_FREE(vnetp);
475 
476 	return (DDI_SUCCESS);
477 
478 vnet_detach_fail:
479 	return (DDI_FAILURE);
480 }
481 
482 /* enable the device for transmit/receive */
483 static int
484 vnet_m_start(void *arg)
485 {
486 	vnet_t		*vnetp = arg;
487 	vp_tl_t		*vp_tlp;
488 	mac_register_t	*vp_macp;
489 	mac_callbacks_t	*cbp;
490 
491 	DBG1((vnetp, "vnet_m_start: enter\n"));
492 
493 	/*
494 	 * NOTE:
495 	 * Currently, we only have generic transport. m_start() invokes
496 	 * vgen_start() which enables ports/channels in vgen and
497 	 * initiates handshake with peer vnets and vsw. In the future when we
498 	 * have support for hardware specific transports, this information
499 	 * needs to be propagted back to vnet from vgen and we need to revisit
500 	 * this code (see comments in vnet_attach()).
501 	 *
502 	 */
503 	WRITE_ENTER(&vnetp->trwlock);
504 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
505 		vp_macp = vp_tlp->macp;
506 		cbp = vp_macp->m_callbacks;
507 		cbp->mc_start(vp_macp->m_driver);
508 	}
509 	RW_EXIT(&vnetp->trwlock);
510 
511 	DBG1((vnetp, "vnet_m_start: exit\n"));
512 	return (VNET_SUCCESS);
513 
514 }
515 
516 /* stop transmit/receive for the device */
517 static void
518 vnet_m_stop(void *arg)
519 {
520 	vnet_t		*vnetp = arg;
521 	vp_tl_t		*vp_tlp;
522 	mac_register_t	*vp_macp;
523 	mac_callbacks_t	*cbp;
524 
525 	DBG1((vnetp, "vnet_m_stop: enter\n"));
526 
527 	WRITE_ENTER(&vnetp->trwlock);
528 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
529 		vp_macp = vp_tlp->macp;
530 		cbp = vp_macp->m_callbacks;
531 		cbp->mc_stop(vp_macp->m_driver);
532 	}
533 	RW_EXIT(&vnetp->trwlock);
534 
535 	DBG1((vnetp, "vnet_m_stop: exit\n"));
536 }
537 
538 /* set the unicast mac address of the device */
539 static int
540 vnet_m_unicst(void *arg, const uint8_t *macaddr)
541 {
542 	_NOTE(ARGUNUSED(macaddr))
543 
544 	vnet_t *vnetp = arg;
545 
546 	DBG1((vnetp, "vnet_m_unicst: enter\n"));
547 	/*
548 	 * NOTE: setting mac address dynamically is not supported.
549 	 */
550 	DBG1((vnetp, "vnet_m_unicst: exit\n"));
551 
552 	return (VNET_FAILURE);
553 }
554 
555 /* enable/disable a multicast address */
556 static int
557 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
558 {
559 	_NOTE(ARGUNUSED(add, mca))
560 
561 	vnet_t *vnetp = arg;
562 	vp_tl_t		*vp_tlp;
563 	mac_register_t	*vp_macp;
564 	mac_callbacks_t	*cbp;
565 	int rv = VNET_SUCCESS;
566 
567 	DBG1((vnetp, "vnet_m_multicst: enter\n"));
568 	READ_ENTER(&vnetp->trwlock);
569 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
570 		if (strcmp(vnetp->vgen_name, vp_tlp->name) == 0) {
571 			vp_macp = vp_tlp->macp;
572 			cbp = vp_macp->m_callbacks;
573 			rv = cbp->mc_multicst(vp_macp->m_driver, add, mca);
574 			break;
575 		}
576 	}
577 	RW_EXIT(&vnetp->trwlock);
578 	DBG1((vnetp, "vnet_m_multicst: exit\n"));
579 	return (rv);
580 }
581 
582 /* set or clear promiscuous mode on the device */
583 static int
584 vnet_m_promisc(void *arg, boolean_t on)
585 {
586 	_NOTE(ARGUNUSED(on))
587 
588 	vnet_t *vnetp = arg;
589 	DBG1((vnetp, "vnet_m_promisc: enter\n"));
590 	/*
591 	 * NOTE: setting promiscuous mode is not supported, just return success.
592 	 */
593 	DBG1((vnetp, "vnet_m_promisc: exit\n"));
594 	return (VNET_SUCCESS);
595 }
596 
597 /*
598  * Transmit a chain of packets. This function provides switching functionality
599  * based on the destination mac address to reach other guests (within ldoms) or
600  * external hosts.
601  */
602 mblk_t *
603 vnet_m_tx(void *arg, mblk_t *mp)
604 {
605 	vnet_t *vnetp;
606 	mblk_t *next;
607 	uint32_t fdbhash;
608 	fdb_t *fdbp;
609 	fdb_fanout_t *fdbhp;
610 	struct ether_header *ehp;
611 	uint8_t *macaddr;
612 	mblk_t *resid_mp;
613 
614 	vnetp = (vnet_t *)arg;
615 	DBG1((vnetp, "vnet_m_tx: enter\n"));
616 	ASSERT(mp != NULL);
617 
618 	while (mp != NULL) {
619 		next = mp->b_next;
620 		mp->b_next = NULL;
621 
622 		/* get the destination mac address in the eth header */
623 		ehp = (struct ether_header *)mp->b_rptr;
624 		macaddr = (uint8_t *)&ehp->ether_dhost;
625 
626 		/* Calculate hash value and fdb fanout */
627 		fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
628 		fdbhp = &(vnetp->fdbhp[fdbhash]);
629 
630 		READ_ENTER(&fdbhp->rwlock);
631 		fdbp = vnet_lookup_fdb(fdbhp, macaddr);
632 		if (fdbp) {
633 			/*
634 			 * If the destination is in FDB, the destination is
635 			 * a vnet device within ldoms and directly reachable,
636 			 * invoke the tx function in the fdb entry.
637 			 */
638 			resid_mp = fdbp->m_tx(fdbp->txarg, mp);
639 			if (resid_mp != NULL) {
640 				/* m_tx failed */
641 				mp->b_next = next;
642 				RW_EXIT(&fdbhp->rwlock);
643 				break;
644 			}
645 			RW_EXIT(&fdbhp->rwlock);
646 		} else {
647 			/* destination is not in FDB */
648 			RW_EXIT(&fdbhp->rwlock);
649 			/*
650 			 * If the destination is broadcast/multicast
651 			 * or an unknown unicast address, forward the
652 			 * packet to vsw, using the last slot in fdb which is
653 			 * reserved for default route.
654 			 */
655 			fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
656 			READ_ENTER(&fdbhp->rwlock);
657 			fdbp = fdbhp->headp;
658 			if (fdbp) {
659 				resid_mp = fdbp->m_tx(fdbp->txarg, mp);
660 				if (resid_mp != NULL) {
661 					/* m_tx failed */
662 					mp->b_next = next;
663 					RW_EXIT(&fdbhp->rwlock);
664 					break;
665 				}
666 			} else {
667 				/* drop the packet */
668 				freemsg(mp);
669 			}
670 			RW_EXIT(&fdbhp->rwlock);
671 		}
672 
673 		mp = next;
674 	}
675 
676 	DBG1((vnetp, "vnet_m_tx: exit\n"));
677 	return (mp);
678 }
679 
680 /* get statistics from the device */
681 int
682 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
683 {
684 	vnet_t *vnetp = arg;
685 	vp_tl_t	*vp_tlp;
686 	mac_register_t	*vp_macp;
687 	mac_callbacks_t	*cbp;
688 	uint64_t val_total = 0;
689 
690 	DBG1((vnetp, "vnet_m_stat: enter\n"));
691 
692 	/*
693 	 * get the specified statistic from each transport and return the
694 	 * aggregate val.  This obviously only works for counters.
695 	 */
696 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
697 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
698 		return (ENOTSUP);
699 	}
700 	READ_ENTER(&vnetp->trwlock);
701 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
702 		vp_macp = vp_tlp->macp;
703 		cbp = vp_macp->m_callbacks;
704 		if (cbp->mc_getstat(vp_macp->m_driver, stat, val) == 0)
705 			val_total += *val;
706 	}
707 	RW_EXIT(&vnetp->trwlock);
708 
709 	*val = val_total;
710 
711 	DBG1((vnetp, "vnet_m_stat: exit\n"));
712 	return (0);
713 }
714 
715 /* wrapper function for mac_register() */
716 static int
717 vnet_mac_register(vnet_t *vnetp)
718 {
719 	mac_register_t	*macp;
720 	int		err;
721 
722 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
723 		return (DDI_FAILURE);
724 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
725 	macp->m_driver = vnetp;
726 	macp->m_dip = vnetp->dip;
727 	macp->m_src_addr = vnetp->curr_macaddr;
728 	macp->m_callbacks = &vnet_m_callbacks;
729 	macp->m_min_sdu = 0;
730 	macp->m_max_sdu = ETHERMTU;
731 
732 	/*
733 	 * Finally, we're ready to register ourselves with the MAC layer
734 	 * interface; if this succeeds, we're all ready to start()
735 	 */
736 	err = mac_register(macp, &vnetp->mh);
737 	mac_free(macp);
738 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
739 }
740 
741 /* add vp_tl to the list */
742 static void
743 vnet_add_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp)
744 {
745 	vp_tl_t *ttlp;
746 
747 	WRITE_ENTER(&vnetp->trwlock);
748 	if (vnetp->tlp == NULL) {
749 		vnetp->tlp = vp_tlp;
750 	} else {
751 		ttlp = vnetp->tlp;
752 		while (ttlp->nextp)
753 			ttlp = ttlp->nextp;
754 		ttlp->nextp = vp_tlp;
755 	}
756 	RW_EXIT(&vnetp->trwlock);
757 }
758 
759 /* remove vp_tl from the list */
760 static void
761 vnet_del_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp)
762 {
763 	vp_tl_t *ttlp, **pretlp;
764 	boolean_t found = B_FALSE;
765 
766 	pretlp = &vnetp->tlp;
767 	ttlp = *pretlp;
768 	while (ttlp) {
769 		if (ttlp == vp_tlp) {
770 			found = B_TRUE;
771 			(*pretlp) = ttlp->nextp;
772 			ttlp->nextp = NULL;
773 			break;
774 		}
775 		pretlp = &(ttlp->nextp);
776 		ttlp = *pretlp;
777 	}
778 
779 	if (found) {
780 		KMEM_FREE(vp_tlp);
781 	}
782 }
783 
784 /* get vp_tl corresponding to the given name */
785 static vp_tl_t *
786 vnet_get_vptl(vnet_t *vnetp, const char *name)
787 {
788 	vp_tl_t *tlp;
789 
790 	tlp = vnetp->tlp;
791 	while (tlp) {
792 		if (strcmp(tlp->name, name) == 0) {
793 			return (tlp);
794 		}
795 		tlp = tlp->nextp;
796 	}
797 	DWARN((vnetp,
798 	    "vnet_get_vptl: can't find vp_tl with name (%s)\n", name));
799 	return (NULL);
800 }
801 
802 /* read the mac address of the device */
803 static int
804 vnet_read_mac_address(vnet_t *vnetp)
805 {
806 	uchar_t 	*macaddr;
807 	uint32_t 	size;
808 	int 		rv;
809 
810 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
811 		DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
812 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
813 		DWARN((vnetp,
814 		"vnet_read_mac_address: prop_lookup failed (%s) err (%d)\n",
815 		macaddr_propname, rv));
816 		return (DDI_FAILURE);
817 	}
818 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
819 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
820 	ddi_prop_free(macaddr);
821 
822 	return (DDI_SUCCESS);
823 }
824 
825 
826 /*
827  * Functions below are called only by generic transport to add/remove/modify
828  * entries in forwarding database. See comments in vgen_port_init(vnet_gen.c).
829  */
830 
831 /* add an entry into the forwarding database */
832 void
833 vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg)
834 {
835 	vnet_t *vnetp = (vnet_t *)arg;
836 	uint32_t fdbhash;
837 	fdb_t *fdbp;
838 	fdb_fanout_t *fdbhp;
839 
840 	/* Calculate hash value and fdb fanout */
841 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
842 	fdbhp = &(vnetp->fdbhp[fdbhash]);
843 
844 	WRITE_ENTER(&fdbhp->rwlock);
845 
846 	fdbp = kmem_zalloc(sizeof (fdb_t), KM_NOSLEEP);
847 	if (fdbp == NULL) {
848 		RW_EXIT(&fdbhp->rwlock);
849 		return;
850 	}
851 	bcopy(macaddr, (caddr_t)fdbp->macaddr, ETHERADDRL);
852 	fdbp->m_tx = m_tx;
853 	fdbp->txarg = txarg;
854 	fdbp->nextp = fdbhp->headp;
855 	fdbhp->headp = fdbp;
856 
857 	RW_EXIT(&fdbhp->rwlock);
858 }
859 
860 /* delete an entry from the forwarding database */
861 void
862 vnet_del_fdb(void *arg, uint8_t *macaddr)
863 {
864 	vnet_t *vnetp = (vnet_t *)arg;
865 	uint32_t fdbhash;
866 	fdb_t *fdbp;
867 	fdb_t **pfdbp;
868 	fdb_fanout_t *fdbhp;
869 
870 	/* Calculate hash value and fdb fanout */
871 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
872 	fdbhp = &(vnetp->fdbhp[fdbhash]);
873 
874 	WRITE_ENTER(&fdbhp->rwlock);
875 
876 	for (pfdbp = &fdbhp->headp; (fdbp  = *pfdbp) != NULL;
877 	    pfdbp = &fdbp->nextp) {
878 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
879 			/* Unlink it from the list */
880 			*pfdbp = fdbp->nextp;
881 			KMEM_FREE(fdbp);
882 			break;
883 		}
884 	}
885 
886 	RW_EXIT(&fdbhp->rwlock);
887 }
888 
889 /* modify an existing entry in the forwarding database */
890 void
891 vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg,
892 	boolean_t upgrade)
893 {
894 	vnet_t *vnetp = (vnet_t *)arg;
895 	uint32_t fdbhash;
896 	fdb_t *fdbp;
897 	fdb_fanout_t *fdbhp;
898 
899 	/* Calculate hash value and fdb fanout */
900 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
901 	fdbhp = &(vnetp->fdbhp[fdbhash]);
902 
903 	if (upgrade == B_TRUE) {
904 		/*
905 		 * Caller already holds the lock as a reader. This can
906 		 * occur if this function is invoked in the context
907 		 * of transmit routine - vnet_m_tx(), where the lock
908 		 * is held as a reader before calling the transmit
909 		 * function of an fdb entry (fdbp->m_tx).
910 		 * See comments in vgen_ldcsend() in vnet_gen.c
911 		 */
912 		if (!rw_tryupgrade(&fdbhp->rwlock)) {
913 			RW_EXIT(&fdbhp->rwlock);
914 			WRITE_ENTER(&fdbhp->rwlock);
915 		}
916 	} else {
917 		/* Caller does not hold the lock */
918 		WRITE_ENTER(&fdbhp->rwlock);
919 	}
920 
921 	for (fdbp = fdbhp->headp; fdbp != NULL; fdbp = fdbp->nextp) {
922 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
923 			/* change the entry to have new tx params */
924 			fdbp->m_tx = m_tx;
925 			fdbp->txarg = txarg;
926 			break;
927 		}
928 	}
929 
930 	if (upgrade == B_TRUE) {
931 		/* restore the caller as a reader */
932 		rw_downgrade(&fdbhp->rwlock);
933 	} else {
934 		RW_EXIT(&fdbhp->rwlock);
935 	}
936 }
937 
938 /* look up an fdb entry based on the mac address, caller holds lock */
939 static fdb_t *
940 vnet_lookup_fdb(fdb_fanout_t *fdbhp, uint8_t *macaddr)
941 {
942 	fdb_t *fdbp = NULL;
943 
944 	for (fdbp = fdbhp->headp; fdbp != NULL; fdbp = fdbp->nextp) {
945 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
946 			break;
947 		}
948 	}
949 
950 	return (fdbp);
951 }
952 
953 /* add default route entry into the forwarding database */
954 void
955 vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg)
956 {
957 	vnet_t *vnetp = (vnet_t *)arg;
958 	fdb_t *fdbp;
959 	fdb_fanout_t *fdbhp;
960 
961 	/*
962 	 * The last hash list is reserved for default route entry,
963 	 * and for now, we have only one entry in this list.
964 	 */
965 	fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
966 
967 	WRITE_ENTER(&fdbhp->rwlock);
968 
969 	if (fdbhp->headp) {
970 		DWARN((vnetp,
971 		    "vnet_add_def_rte: default rte already exists\n"));
972 		RW_EXIT(&fdbhp->rwlock);
973 		return;
974 	}
975 	fdbp = kmem_zalloc(sizeof (fdb_t), KM_NOSLEEP);
976 	if (fdbp == NULL) {
977 		RW_EXIT(&fdbhp->rwlock);
978 		return;
979 	}
980 	bzero(fdbp->macaddr, ETHERADDRL);
981 	fdbp->m_tx = m_tx;
982 	fdbp->txarg = txarg;
983 	fdbp->nextp = NULL;
984 	fdbhp->headp = fdbp;
985 
986 	RW_EXIT(&fdbhp->rwlock);
987 }
988 
989 /* delete default route entry from the forwarding database */
990 void
991 vnet_del_def_rte(void *arg)
992 {
993 	vnet_t *vnetp = (vnet_t *)arg;
994 	fdb_t *fdbp;
995 	fdb_fanout_t *fdbhp;
996 
997 	/*
998 	 * The last hash list is reserved for default route entry,
999 	 * and for now, we have only one entry in this list.
1000 	 */
1001 	fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
1002 
1003 	WRITE_ENTER(&fdbhp->rwlock);
1004 
1005 	if (fdbhp->headp == NULL) {
1006 		RW_EXIT(&fdbhp->rwlock);
1007 		return;
1008 	}
1009 	fdbp = fdbhp->headp;
1010 	KMEM_FREE(fdbp);
1011 	fdbhp->headp = NULL;
1012 
1013 	RW_EXIT(&fdbhp->rwlock);
1014 }
1015 
1016 void
1017 vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
1018 {
1019 	vnet_t *vnetp = arg;
1020 	mac_rx(vnetp->mh, mrh, mp);
1021 }
1022 
1023 void
1024 vnet_tx_update(void *arg)
1025 {
1026 	vnet_t *vnetp = arg;
1027 	mac_tx_update(vnetp->mh);
1028 }
1029