xref: /titanic_51/usr/src/uts/sun4v/io/vnet.c (revision 5363b1129db4ee42d2c9736898eab4670580bec7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/dlpi.h>
42 #include <net/if.h>
43 #include <sys/mac.h>
44 #include <sys/mac_ether.h>
45 #include <sys/ddi.h>
46 #include <sys/sunddi.h>
47 #include <sys/strsun.h>
48 #include <sys/note.h>
49 #include <sys/vnet.h>
50 
51 /*
52  * Function prototypes.
53  */
54 
55 /* DDI entrypoints */
56 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
57 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
58 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
59 
60 /* MAC entrypoints  */
61 static int vnet_m_stat(void *, uint_t, uint64_t *);
62 static int vnet_m_start(void *);
63 static void vnet_m_stop(void *);
64 static int vnet_m_promisc(void *, boolean_t);
65 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
66 static int vnet_m_unicst(void *, const uint8_t *);
67 mblk_t *vnet_m_tx(void *, mblk_t *);
68 
69 /* vnet internal functions */
70 static int vnet_mac_register(vnet_t *);
71 static int vnet_read_mac_address(vnet_t *vnetp);
72 static void vnet_add_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp);
73 static void vnet_del_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp);
74 static vp_tl_t *vnet_get_vptl(vnet_t *vnetp, const char *devname);
75 static fdb_t *vnet_lookup_fdb(fdb_fanout_t *fdbhp, uint8_t *macaddr);
76 
77 /* exported functions */
78 void vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
79 void vnet_del_fdb(void *arg, uint8_t *macaddr);
80 void vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx,
81 	void *txarg, boolean_t upgrade);
82 void vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg);
83 void vnet_del_def_rte(void *arg);
84 void vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp);
85 void vnet_tx_update(void *arg);
86 
87 /* externs */
88 extern int vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
89 	mac_register_t **vgenmacp);
90 extern int vgen_uninit(void *arg);
91 
92 static mac_callbacks_t vnet_m_callbacks = {
93 	0,
94 	vnet_m_stat,
95 	vnet_m_start,
96 	vnet_m_stop,
97 	vnet_m_promisc,
98 	vnet_m_multicst,
99 	vnet_m_unicst,
100 	vnet_m_tx,
101 	NULL,
102 	NULL,
103 	NULL
104 };
105 
106 /*
107  * Linked list of "vnet_t" structures - one per instance.
108  */
109 static vnet_t	*vnet_headp = NULL;
110 static krwlock_t vnet_rw;
111 
112 /* Tunables */
113 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
114 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
115 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
116 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
117 uint32_t vnet_nfdb_hash = VNET_NFDB_HASH;	/* size of fdb hash table */
118 uint32_t vnet_nrbufs = VNET_NRBUFS;	/* number of receive buffers */
119 
120 /*
121  * Property names
122  */
123 static char macaddr_propname[] = "local-mac-address";
124 
125 /*
126  * This is the string displayed by modinfo(1m).
127  */
128 static char vnet_ident[] = "vnet driver v%I%";
129 extern struct mod_ops mod_driverops;
130 static struct cb_ops cb_vnetops = {
131 	nulldev,		/* cb_open */
132 	nulldev,		/* cb_close */
133 	nodev,			/* cb_strategy */
134 	nodev,			/* cb_print */
135 	nodev,			/* cb_dump */
136 	nodev,			/* cb_read */
137 	nodev,			/* cb_write */
138 	nodev,			/* cb_ioctl */
139 	nodev,			/* cb_devmap */
140 	nodev,			/* cb_mmap */
141 	nodev,			/* cb_segmap */
142 	nochpoll,		/* cb_chpoll */
143 	ddi_prop_op,		/* cb_prop_op */
144 	NULL,			/* cb_stream */
145 	(int)(D_MP)		/* cb_flag */
146 };
147 
148 static struct dev_ops vnetops = {
149 	DEVO_REV,		/* devo_rev */
150 	0,			/* devo_refcnt */
151 	NULL,			/* devo_getinfo */
152 	nulldev,		/* devo_identify */
153 	nulldev,		/* devo_probe */
154 	vnetattach,		/* devo_attach */
155 	vnetdetach,		/* devo_detach */
156 	nodev,			/* devo_reset */
157 	&cb_vnetops,		/* devo_cb_ops */
158 	(struct bus_ops *)NULL	/* devo_bus_ops */
159 };
160 
161 static struct modldrv modldrv = {
162 	&mod_driverops,		/* Type of module.  This one is a driver */
163 	vnet_ident,		/* ID string */
164 	&vnetops		/* driver specific ops */
165 };
166 
167 static struct modlinkage modlinkage = {
168 	MODREV_1, (void *)&modldrv, NULL
169 };
170 
171 
172 /*
173  * Print debug messages - set to 0xf to enable all msgs
174  */
175 int _vnet_dbglevel = 0x8;
176 
177 void
178 _vnetdebug_printf(void *arg, const char *fmt, ...)
179 {
180 	char    buf[512];
181 	va_list ap;
182 	vnet_t *vnetp = (vnet_t *)arg;
183 
184 	va_start(ap, fmt);
185 	(void) vsprintf(buf, fmt, ap);
186 	va_end(ap);
187 
188 	if (vnetp == NULL)
189 		cmn_err(CE_CONT, "%s\n", buf);
190 	else
191 		cmn_err(CE_CONT, "vnet%d: %s\n", vnetp->instance, buf);
192 }
193 
194 #ifdef DEBUG
195 
196 /*
197  * NOTE: any changes to the definitions below need corresponding changes in
198  * vnet_gen.c
199  */
200 
201 /*
202  * debug levels:
203  * DBG_LEVEL1:	Function entry/exit tracing
204  * DBG_LEVEL2:	Info messages
205  * DBG_LEVEL3:	Warning messages
206  * DBG_LEVEL4:	Error messages
207  */
208 
209 enum	{ DBG_LEVEL1 = 0x01, DBG_LEVEL2 = 0x02, DBG_LEVEL3 = 0x04,
210 	    DBG_LEVEL4 = 0x08 };
211 
212 #define	DBG1(_s)	do {						\
213 			    if ((_vnet_dbglevel & DBG_LEVEL1) != 0) {	\
214 					_vnetdebug_printf _s;		\
215 			    }					\
216 			_NOTE(CONSTCOND) } while (0)
217 
218 #define	DBG2(_s)	do {						\
219 			    if ((_vnet_dbglevel & DBG_LEVEL2) != 0) {	\
220 					_vnetdebug_printf _s;		\
221 			    }					\
222 			_NOTE(CONSTCOND) } while (0)
223 
224 #define	DWARN(_s)	do {						\
225 			    if ((_vnet_dbglevel & DBG_LEVEL3) != 0) {	\
226 					_vnetdebug_printf _s;		\
227 			    }					\
228 			_NOTE(CONSTCOND) } while (0)
229 
230 #define	DERR(_s)	do {						\
231 			    if ((_vnet_dbglevel & DBG_LEVEL4) != 0) {	\
232 					_vnetdebug_printf _s;		\
233 			    }					\
234 			_NOTE(CONSTCOND) } while (0)
235 
236 #else
237 
238 #define	DBG1(_s)	if (0)	_vnetdebug_printf _s
239 #define	DBG2(_s)	if (0)	_vnetdebug_printf _s
240 #define	DWARN(_s)	if (0)	_vnetdebug_printf _s
241 #define	DERR(_s)	if (0)	_vnetdebug_printf _s
242 
243 #endif
244 
245 /* _init(9E): initialize the loadable module */
246 int
247 _init(void)
248 {
249 	int status;
250 
251 	DBG1((NULL, "_init: enter\n"));
252 
253 	mac_init_ops(&vnetops, "vnet");
254 	status = mod_install(&modlinkage);
255 	if (status != 0) {
256 		mac_fini_ops(&vnetops);
257 	}
258 
259 	DBG1((NULL, "_init: exit\n"));
260 	return (status);
261 }
262 
263 /* _fini(9E): prepare the module for unloading. */
264 int
265 _fini(void)
266 {
267 	int status;
268 
269 	DBG1((NULL, "_fini: enter\n"));
270 
271 	status = mod_remove(&modlinkage);
272 	if (status != 0)
273 		return (status);
274 	mac_fini_ops(&vnetops);
275 
276 	DBG1((NULL, "_fini: exit\n"));
277 	return (status);
278 }
279 
280 /* _info(9E): return information about the loadable module */
281 int
282 _info(struct modinfo *modinfop)
283 {
284 	return (mod_info(&modlinkage, modinfop));
285 }
286 
287 /*
288  * attach(9E): attach a device to the system.
289  * called once for each instance of the device on the system.
290  */
291 static int
292 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
293 {
294 	vnet_t		*vnetp;
295 	vp_tl_t		*vp_tlp;
296 	int		instance;
297 	int		status;
298 	enum		{ AST_init = 0x0, AST_vnet_alloc = 0x1,
299 			    AST_mac_alloc = 0x2, AST_read_macaddr = 0x4,
300 			    AST_vgen_init = 0x8, AST_vptl_alloc = 0x10,
301 			    AST_fdbh_alloc = 0x20 }
302 			attach_state;
303 	mac_register_t	*vgenmacp = NULL;
304 	uint32_t	nfdbh = 0;
305 
306 	attach_state = AST_init;
307 
308 	switch (cmd) {
309 	case DDI_ATTACH:
310 		break;
311 	case DDI_RESUME:
312 	case DDI_PM_RESUME:
313 	default:
314 		goto vnet_attach_fail;
315 	}
316 
317 	instance = ddi_get_instance(dip);
318 	DBG1((NULL, "vnetattach: instance(%d) enter\n", instance));
319 
320 	/* allocate vnet_t and mac_t structures */
321 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
322 	attach_state |= AST_vnet_alloc;
323 
324 	/* setup links to vnet_t from both devinfo and mac_t */
325 	ddi_set_driver_private(dip, (caddr_t)vnetp);
326 	vnetp->dip = dip;
327 	vnetp->instance = instance;
328 
329 	/* read the mac address */
330 	status = vnet_read_mac_address(vnetp);
331 	if (status != DDI_SUCCESS) {
332 		goto vnet_attach_fail;
333 	}
334 	attach_state |= AST_read_macaddr;
335 
336 	/*
337 	 * Initialize the generic vnet proxy transport. This is the first
338 	 * and default transport used by vnet. The generic transport
339 	 * is provided by using sun4v LDC (logical domain channel). On success,
340 	 * vgen_init() provides a pointer to mac_t of generic transport.
341 	 * Currently, this generic layer provides network connectivity to other
342 	 * vnets within ldoms and also to remote hosts oustide ldoms through
343 	 * the virtual switch (vsw) device on domain0. In the future, when
344 	 * physical adapters that are able to share their resources (such as
345 	 * dma channels) with guest domains become available, the vnet device
346 	 * will use hardware specific driver to communicate directly over the
347 	 * physical device to reach remote hosts without going through vswitch.
348 	 */
349 	status = vgen_init(vnetp, vnetp->dip, (uint8_t *)vnetp->curr_macaddr,
350 	    &vgenmacp);
351 	if (status != DDI_SUCCESS) {
352 		DERR((vnetp, "vgen_init() failed\n"));
353 		goto vnet_attach_fail;
354 	}
355 	attach_state |= AST_vgen_init;
356 
357 	vp_tlp = kmem_zalloc(sizeof (vp_tl_t), KM_SLEEP);
358 	vp_tlp->macp = vgenmacp;
359 	(void) snprintf(vp_tlp->name, MAXNAMELEN, "%s%u", "vgen", instance);
360 	(void) strcpy(vnetp->vgen_name, vp_tlp->name);
361 
362 	/* add generic transport to the list of vnet proxy transports */
363 	vnet_add_vptl(vnetp, vp_tlp);
364 	attach_state |= AST_vptl_alloc;
365 
366 	nfdbh = vnet_nfdb_hash;
367 	if ((nfdbh < VNET_NFDB_HASH) || (nfdbh > VNET_NFDB_HASH_MAX)) {
368 		vnetp->nfdb_hash = VNET_NFDB_HASH;
369 	}
370 	else
371 		vnetp->nfdb_hash = nfdbh;
372 
373 	/* allocate fdb hash table, with an extra slot for default route */
374 	vnetp->fdbhp = kmem_zalloc(sizeof (fdb_fanout_t) *
375 	    (vnetp->nfdb_hash + 1), KM_SLEEP);
376 	attach_state |= AST_fdbh_alloc;
377 
378 	/* register with MAC layer */
379 	status = vnet_mac_register(vnetp);
380 	if (status != DDI_SUCCESS) {
381 		goto vnet_attach_fail;
382 	}
383 
384 	/* add to the list of vnet devices */
385 	WRITE_ENTER(&vnet_rw);
386 	vnetp->nextp = vnet_headp;
387 	vnet_headp = vnetp;
388 	RW_EXIT(&vnet_rw);
389 
390 	DBG1((NULL, "vnetattach: instance(%d) exit\n", instance));
391 	return (DDI_SUCCESS);
392 
393 vnet_attach_fail:
394 	if (attach_state & AST_fdbh_alloc) {
395 		kmem_free(vnetp->fdbhp,
396 		    sizeof (fdb_fanout_t) * (vnetp->nfdb_hash + 1));
397 	}
398 	if (attach_state & AST_vptl_alloc) {
399 		WRITE_ENTER(&vnetp->trwlock);
400 		vnet_del_vptl(vnetp, vp_tlp);
401 		RW_EXIT(&vnetp->trwlock);
402 	}
403 	if (attach_state & AST_vgen_init) {
404 		(void) vgen_uninit(vgenmacp->m_driver);
405 	}
406 	if (attach_state & AST_vnet_alloc) {
407 		KMEM_FREE(vnetp);
408 	}
409 	return (DDI_FAILURE);
410 }
411 
412 /*
413  * detach(9E): detach a device from the system.
414  */
415 static int
416 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
417 {
418 	vnet_t		*vnetp;
419 	vnet_t		**vnetpp;
420 	vp_tl_t		*vp_tlp;
421 	int		instance;
422 	int		rv;
423 
424 	instance = ddi_get_instance(dip);
425 	DBG1((NULL, "vnetdetach: instance(%d) enter\n", instance));
426 
427 	vnetp = ddi_get_driver_private(dip);
428 	if (vnetp == NULL) {
429 		goto vnet_detach_fail;
430 	}
431 
432 	switch (cmd) {
433 	case DDI_DETACH:
434 		break;
435 	case DDI_SUSPEND:
436 	case DDI_PM_SUSPEND:
437 	default:
438 		goto vnet_detach_fail;
439 	}
440 
441 	/* uninit and free vnet proxy transports */
442 	WRITE_ENTER(&vnetp->trwlock);
443 	while ((vp_tlp = vnetp->tlp) != NULL) {
444 		if (strcmp(vnetp->vgen_name, vp_tlp->name) == 0) {
445 			/* uninitialize generic transport */
446 			rv = vgen_uninit(vp_tlp->macp->m_driver);
447 			if (rv != DDI_SUCCESS) {
448 				RW_EXIT(&vnetp->trwlock);
449 				goto vnet_detach_fail;
450 			}
451 		}
452 		vnet_del_vptl(vnetp, vp_tlp);
453 	}
454 	RW_EXIT(&vnetp->trwlock);
455 
456 	/*
457 	 * Unregister from the MAC subsystem.  This can fail, in
458 	 * particular if there are DLPI style-2 streams still open -
459 	 * in which case we just return failure.
460 	 */
461 	if (mac_unregister(vnetp->mh) != 0)
462 		goto vnet_detach_fail;
463 
464 	/* unlink from instance(vnet_t) list */
465 	WRITE_ENTER(&vnet_rw);
466 	for (vnetpp = &vnet_headp; *vnetpp; vnetpp = &(*vnetpp)->nextp) {
467 		if (*vnetpp == vnetp) {
468 			*vnetpp = vnetp->nextp;
469 			break;
470 		}
471 	}
472 	RW_EXIT(&vnet_rw);
473 
474 	kmem_free(vnetp->fdbhp,
475 	    sizeof (fdb_fanout_t) * (vnetp->nfdb_hash + 1));
476 
477 	KMEM_FREE(vnetp);
478 
479 	return (DDI_SUCCESS);
480 
481 vnet_detach_fail:
482 	return (DDI_FAILURE);
483 }
484 
485 /* enable the device for transmit/receive */
486 static int
487 vnet_m_start(void *arg)
488 {
489 	vnet_t		*vnetp = arg;
490 	vp_tl_t		*vp_tlp;
491 	mac_register_t	*vp_macp;
492 	mac_callbacks_t	*cbp;
493 
494 	DBG1((vnetp, "vnet_m_start: enter\n"));
495 
496 	/*
497 	 * NOTE:
498 	 * Currently, we only have generic transport. m_start() invokes
499 	 * vgen_start() which enables ports/channels in vgen and
500 	 * initiates handshake with peer vnets and vsw. In the future when we
501 	 * have support for hardware specific transports, this information
502 	 * needs to be propagted back to vnet from vgen and we need to revisit
503 	 * this code (see comments in vnet_attach()).
504 	 *
505 	 */
506 	WRITE_ENTER(&vnetp->trwlock);
507 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
508 		vp_macp = vp_tlp->macp;
509 		cbp = vp_macp->m_callbacks;
510 		cbp->mc_start(vp_macp->m_driver);
511 	}
512 	RW_EXIT(&vnetp->trwlock);
513 
514 	DBG1((vnetp, "vnet_m_start: exit\n"));
515 	return (VNET_SUCCESS);
516 
517 }
518 
519 /* stop transmit/receive for the device */
520 static void
521 vnet_m_stop(void *arg)
522 {
523 	vnet_t		*vnetp = arg;
524 	vp_tl_t		*vp_tlp;
525 	mac_register_t	*vp_macp;
526 	mac_callbacks_t	*cbp;
527 
528 	DBG1((vnetp, "vnet_m_stop: enter\n"));
529 
530 	WRITE_ENTER(&vnetp->trwlock);
531 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
532 		vp_macp = vp_tlp->macp;
533 		cbp = vp_macp->m_callbacks;
534 		cbp->mc_stop(vp_macp->m_driver);
535 	}
536 	RW_EXIT(&vnetp->trwlock);
537 
538 	DBG1((vnetp, "vnet_m_stop: exit\n"));
539 }
540 
541 /* set the unicast mac address of the device */
542 static int
543 vnet_m_unicst(void *arg, const uint8_t *macaddr)
544 {
545 	_NOTE(ARGUNUSED(macaddr))
546 
547 	vnet_t *vnetp = arg;
548 
549 	DBG1((vnetp, "vnet_m_unicst: enter\n"));
550 	/*
551 	 * NOTE: setting mac address dynamically is not supported.
552 	 */
553 	DBG1((vnetp, "vnet_m_unicst: exit\n"));
554 
555 	return (VNET_FAILURE);
556 }
557 
558 /* enable/disable a multicast address */
559 static int
560 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
561 {
562 	_NOTE(ARGUNUSED(add, mca))
563 
564 	vnet_t *vnetp = arg;
565 	vp_tl_t		*vp_tlp;
566 	mac_register_t	*vp_macp;
567 	mac_callbacks_t	*cbp;
568 	int rv = VNET_SUCCESS;
569 
570 	DBG1((vnetp, "vnet_m_multicst: enter\n"));
571 	READ_ENTER(&vnetp->trwlock);
572 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
573 		if (strcmp(vnetp->vgen_name, vp_tlp->name) == 0) {
574 			vp_macp = vp_tlp->macp;
575 			cbp = vp_macp->m_callbacks;
576 			rv = cbp->mc_multicst(vp_macp->m_driver, add, mca);
577 			break;
578 		}
579 	}
580 	RW_EXIT(&vnetp->trwlock);
581 	DBG1((vnetp, "vnet_m_multicst: exit\n"));
582 	return (rv);
583 }
584 
585 /* set or clear promiscuous mode on the device */
586 static int
587 vnet_m_promisc(void *arg, boolean_t on)
588 {
589 	_NOTE(ARGUNUSED(on))
590 
591 	vnet_t *vnetp = arg;
592 	DBG1((vnetp, "vnet_m_promisc: enter\n"));
593 	/*
594 	 * NOTE: setting promiscuous mode is not supported, just return success.
595 	 */
596 	DBG1((vnetp, "vnet_m_promisc: exit\n"));
597 	return (VNET_SUCCESS);
598 }
599 
600 /*
601  * Transmit a chain of packets. This function provides switching functionality
602  * based on the destination mac address to reach other guests (within ldoms) or
603  * external hosts.
604  */
605 mblk_t *
606 vnet_m_tx(void *arg, mblk_t *mp)
607 {
608 	vnet_t *vnetp;
609 	mblk_t *next;
610 	uint32_t fdbhash;
611 	fdb_t *fdbp;
612 	fdb_fanout_t *fdbhp;
613 	struct ether_header *ehp;
614 	uint8_t *macaddr;
615 	mblk_t *resid_mp;
616 
617 	vnetp = (vnet_t *)arg;
618 	DBG1((vnetp, "vnet_m_tx: enter\n"));
619 	ASSERT(mp != NULL);
620 
621 	while (mp != NULL) {
622 		next = mp->b_next;
623 		mp->b_next = NULL;
624 
625 		/* get the destination mac address in the eth header */
626 		ehp = (struct ether_header *)mp->b_rptr;
627 		macaddr = (uint8_t *)&ehp->ether_dhost;
628 
629 		/* Calculate hash value and fdb fanout */
630 		fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
631 		fdbhp = &(vnetp->fdbhp[fdbhash]);
632 
633 		READ_ENTER(&fdbhp->rwlock);
634 		fdbp = vnet_lookup_fdb(fdbhp, macaddr);
635 		if (fdbp) {
636 			/*
637 			 * If the destination is in FDB, the destination is
638 			 * a vnet device within ldoms and directly reachable,
639 			 * invoke the tx function in the fdb entry.
640 			 */
641 			resid_mp = fdbp->m_tx(fdbp->txarg, mp);
642 			if (resid_mp != NULL) {
643 				/* m_tx failed */
644 				mp->b_next = next;
645 				RW_EXIT(&fdbhp->rwlock);
646 				break;
647 			}
648 			RW_EXIT(&fdbhp->rwlock);
649 		} else {
650 			/* destination is not in FDB */
651 			RW_EXIT(&fdbhp->rwlock);
652 			/*
653 			 * If the destination is broadcast/multicast
654 			 * or an unknown unicast address, forward the
655 			 * packet to vsw, using the last slot in fdb which is
656 			 * reserved for default route.
657 			 */
658 			fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
659 			READ_ENTER(&fdbhp->rwlock);
660 			fdbp = fdbhp->headp;
661 			if (fdbp) {
662 				resid_mp = fdbp->m_tx(fdbp->txarg, mp);
663 				if (resid_mp != NULL) {
664 					/* m_tx failed */
665 					mp->b_next = next;
666 					RW_EXIT(&fdbhp->rwlock);
667 					break;
668 				}
669 			} else {
670 				/* drop the packet */
671 				freemsg(mp);
672 			}
673 			RW_EXIT(&fdbhp->rwlock);
674 		}
675 
676 		mp = next;
677 	}
678 
679 	DBG1((vnetp, "vnet_m_tx: exit\n"));
680 	return (mp);
681 }
682 
683 /* get statistics from the device */
684 int
685 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
686 {
687 	vnet_t *vnetp = arg;
688 	vp_tl_t	*vp_tlp;
689 	mac_register_t	*vp_macp;
690 	mac_callbacks_t	*cbp;
691 	uint64_t val_total = 0;
692 
693 	DBG1((vnetp, "vnet_m_stat: enter\n"));
694 
695 	/*
696 	 * get the specified statistic from each transport and return the
697 	 * aggregate val.  This obviously only works for counters.
698 	 */
699 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
700 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
701 		return (ENOTSUP);
702 	}
703 	READ_ENTER(&vnetp->trwlock);
704 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
705 		vp_macp = vp_tlp->macp;
706 		cbp = vp_macp->m_callbacks;
707 		if (cbp->mc_getstat(vp_macp->m_driver, stat, val) == 0)
708 			val_total += *val;
709 	}
710 	RW_EXIT(&vnetp->trwlock);
711 
712 	*val = val_total;
713 
714 	DBG1((vnetp, "vnet_m_stat: exit\n"));
715 	return (0);
716 }
717 
718 /* wrapper function for mac_register() */
719 static int
720 vnet_mac_register(vnet_t *vnetp)
721 {
722 	mac_register_t	*macp;
723 	int		err;
724 
725 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
726 		return (DDI_FAILURE);
727 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
728 	macp->m_driver = vnetp;
729 	macp->m_dip = vnetp->dip;
730 	macp->m_src_addr = vnetp->curr_macaddr;
731 	macp->m_callbacks = &vnet_m_callbacks;
732 	macp->m_min_sdu = 0;
733 	macp->m_max_sdu = ETHERMTU;
734 
735 	/*
736 	 * Finally, we're ready to register ourselves with the MAC layer
737 	 * interface; if this succeeds, we're all ready to start()
738 	 */
739 	err = mac_register(macp, &vnetp->mh);
740 	mac_free(macp);
741 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
742 }
743 
744 /* add vp_tl to the list */
745 static void
746 vnet_add_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp)
747 {
748 	vp_tl_t *ttlp;
749 
750 	WRITE_ENTER(&vnetp->trwlock);
751 	if (vnetp->tlp == NULL) {
752 		vnetp->tlp = vp_tlp;
753 	} else {
754 		ttlp = vnetp->tlp;
755 		while (ttlp->nextp)
756 			ttlp = ttlp->nextp;
757 		ttlp->nextp = vp_tlp;
758 	}
759 	RW_EXIT(&vnetp->trwlock);
760 }
761 
762 /* remove vp_tl from the list */
763 static void
764 vnet_del_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp)
765 {
766 	vp_tl_t *ttlp, **pretlp;
767 	boolean_t found = B_FALSE;
768 
769 	pretlp = &vnetp->tlp;
770 	ttlp = *pretlp;
771 	while (ttlp) {
772 		if (ttlp == vp_tlp) {
773 			found = B_TRUE;
774 			(*pretlp) = ttlp->nextp;
775 			ttlp->nextp = NULL;
776 			break;
777 		}
778 		pretlp = &(ttlp->nextp);
779 		ttlp = *pretlp;
780 	}
781 
782 	if (found) {
783 		KMEM_FREE(vp_tlp);
784 	}
785 }
786 
787 /* get vp_tl corresponding to the given name */
788 static vp_tl_t *
789 vnet_get_vptl(vnet_t *vnetp, const char *name)
790 {
791 	vp_tl_t *tlp;
792 
793 	tlp = vnetp->tlp;
794 	while (tlp) {
795 		if (strcmp(tlp->name, name) == 0) {
796 			return (tlp);
797 		}
798 		tlp = tlp->nextp;
799 	}
800 	DWARN((vnetp,
801 	    "vnet_get_vptl: can't find vp_tl with name (%s)\n", name));
802 	return (NULL);
803 }
804 
805 /* read the mac address of the device */
806 static int
807 vnet_read_mac_address(vnet_t *vnetp)
808 {
809 	uchar_t 	*macaddr;
810 	uint32_t 	size;
811 	int 		rv;
812 
813 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
814 		DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
815 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
816 		DWARN((vnetp,
817 		"vnet_read_mac_address: prop_lookup failed (%s) err (%d)\n",
818 		macaddr_propname, rv));
819 		return (DDI_FAILURE);
820 	}
821 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
822 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
823 	ddi_prop_free(macaddr);
824 
825 	return (DDI_SUCCESS);
826 }
827 
828 
829 /*
830  * Functions below are called only by generic transport to add/remove/modify
831  * entries in forwarding database. See comments in vgen_port_init(vnet_gen.c).
832  */
833 
834 /* add an entry into the forwarding database */
835 void
836 vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg)
837 {
838 	vnet_t *vnetp = (vnet_t *)arg;
839 	uint32_t fdbhash;
840 	fdb_t *fdbp;
841 	fdb_fanout_t *fdbhp;
842 
843 	/* Calculate hash value and fdb fanout */
844 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
845 	fdbhp = &(vnetp->fdbhp[fdbhash]);
846 
847 	WRITE_ENTER(&fdbhp->rwlock);
848 
849 	fdbp = kmem_zalloc(sizeof (fdb_t), KM_NOSLEEP);
850 	if (fdbp == NULL) {
851 		RW_EXIT(&fdbhp->rwlock);
852 		return;
853 	}
854 	bcopy(macaddr, (caddr_t)fdbp->macaddr, ETHERADDRL);
855 	fdbp->m_tx = m_tx;
856 	fdbp->txarg = txarg;
857 	fdbp->nextp = fdbhp->headp;
858 	fdbhp->headp = fdbp;
859 
860 	RW_EXIT(&fdbhp->rwlock);
861 }
862 
863 /* delete an entry from the forwarding database */
864 void
865 vnet_del_fdb(void *arg, uint8_t *macaddr)
866 {
867 	vnet_t *vnetp = (vnet_t *)arg;
868 	uint32_t fdbhash;
869 	fdb_t *fdbp;
870 	fdb_t **pfdbp;
871 	fdb_fanout_t *fdbhp;
872 
873 	/* Calculate hash value and fdb fanout */
874 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
875 	fdbhp = &(vnetp->fdbhp[fdbhash]);
876 
877 	WRITE_ENTER(&fdbhp->rwlock);
878 
879 	for (pfdbp = &fdbhp->headp; (fdbp  = *pfdbp) != NULL;
880 	    pfdbp = &fdbp->nextp) {
881 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
882 			/* Unlink it from the list */
883 			*pfdbp = fdbp->nextp;
884 			KMEM_FREE(fdbp);
885 			break;
886 		}
887 	}
888 
889 	RW_EXIT(&fdbhp->rwlock);
890 }
891 
892 /* modify an existing entry in the forwarding database */
893 void
894 vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg,
895 	boolean_t upgrade)
896 {
897 	vnet_t *vnetp = (vnet_t *)arg;
898 	uint32_t fdbhash;
899 	fdb_t *fdbp;
900 	fdb_fanout_t *fdbhp;
901 
902 	/* Calculate hash value and fdb fanout */
903 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
904 	fdbhp = &(vnetp->fdbhp[fdbhash]);
905 
906 	if (upgrade == B_TRUE) {
907 		/*
908 		 * Caller already holds the lock as a reader. This can
909 		 * occur if this function is invoked in the context
910 		 * of transmit routine - vnet_m_tx(), where the lock
911 		 * is held as a reader before calling the transmit
912 		 * function of an fdb entry (fdbp->m_tx).
913 		 * See comments in vgen_ldcsend() in vnet_gen.c
914 		 */
915 		if (!rw_tryupgrade(&fdbhp->rwlock)) {
916 			RW_EXIT(&fdbhp->rwlock);
917 			WRITE_ENTER(&fdbhp->rwlock);
918 		}
919 	} else {
920 		/* Caller does not hold the lock */
921 		WRITE_ENTER(&fdbhp->rwlock);
922 	}
923 
924 	for (fdbp = fdbhp->headp; fdbp != NULL; fdbp = fdbp->nextp) {
925 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
926 			/* change the entry to have new tx params */
927 			fdbp->m_tx = m_tx;
928 			fdbp->txarg = txarg;
929 			break;
930 		}
931 	}
932 
933 	if (upgrade == B_TRUE) {
934 		/* restore the caller as a reader */
935 		rw_downgrade(&fdbhp->rwlock);
936 	} else {
937 		RW_EXIT(&fdbhp->rwlock);
938 	}
939 }
940 
941 /* look up an fdb entry based on the mac address, caller holds lock */
942 static fdb_t *
943 vnet_lookup_fdb(fdb_fanout_t *fdbhp, uint8_t *macaddr)
944 {
945 	fdb_t *fdbp = NULL;
946 
947 	for (fdbp = fdbhp->headp; fdbp != NULL; fdbp = fdbp->nextp) {
948 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
949 			break;
950 		}
951 	}
952 
953 	return (fdbp);
954 }
955 
956 /* add default route entry into the forwarding database */
957 void
958 vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg)
959 {
960 	vnet_t *vnetp = (vnet_t *)arg;
961 	fdb_t *fdbp;
962 	fdb_fanout_t *fdbhp;
963 
964 	/*
965 	 * The last hash list is reserved for default route entry,
966 	 * and for now, we have only one entry in this list.
967 	 */
968 	fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
969 
970 	WRITE_ENTER(&fdbhp->rwlock);
971 
972 	if (fdbhp->headp) {
973 		DWARN((vnetp,
974 		    "vnet_add_def_rte: default rte already exists\n"));
975 		RW_EXIT(&fdbhp->rwlock);
976 		return;
977 	}
978 	fdbp = kmem_zalloc(sizeof (fdb_t), KM_NOSLEEP);
979 	if (fdbp == NULL) {
980 		RW_EXIT(&fdbhp->rwlock);
981 		return;
982 	}
983 	bzero(fdbp->macaddr, ETHERADDRL);
984 	fdbp->m_tx = m_tx;
985 	fdbp->txarg = txarg;
986 	fdbp->nextp = NULL;
987 	fdbhp->headp = fdbp;
988 
989 	RW_EXIT(&fdbhp->rwlock);
990 }
991 
992 /* delete default route entry from the forwarding database */
993 void
994 vnet_del_def_rte(void *arg)
995 {
996 	vnet_t *vnetp = (vnet_t *)arg;
997 	fdb_t *fdbp;
998 	fdb_fanout_t *fdbhp;
999 
1000 	/*
1001 	 * The last hash list is reserved for default route entry,
1002 	 * and for now, we have only one entry in this list.
1003 	 */
1004 	fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
1005 
1006 	WRITE_ENTER(&fdbhp->rwlock);
1007 
1008 	if (fdbhp->headp == NULL) {
1009 		RW_EXIT(&fdbhp->rwlock);
1010 		return;
1011 	}
1012 	fdbp = fdbhp->headp;
1013 	KMEM_FREE(fdbp);
1014 	fdbhp->headp = NULL;
1015 
1016 	RW_EXIT(&fdbhp->rwlock);
1017 }
1018 
1019 void
1020 vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
1021 {
1022 	vnet_t *vnetp = arg;
1023 	mac_rx(vnetp->mh, mrh, mp);
1024 }
1025 
1026 void
1027 vnet_tx_update(void *arg)
1028 {
1029 	vnet_t *vnetp = arg;
1030 	mac_tx_update(vnetp->mh);
1031 }
1032