xref: /titanic_52/usr/src/uts/sun4v/io/vnet.c (revision 6ee4b8d7ef9a262e3f564e81b2b2f4d8e61ffe15)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/dlpi.h>
42 #include <net/if.h>
43 #include <sys/mac.h>
44 #include <sys/mac_ether.h>
45 #include <sys/ddi.h>
46 #include <sys/sunddi.h>
47 #include <sys/strsun.h>
48 #include <sys/note.h>
49 #include <sys/vnet.h>
50 
51 /*
52  * Function prototypes.
53  */
54 
55 /* DDI entrypoints */
56 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
57 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
58 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
59 
60 /* MAC entrypoints  */
61 static int vnet_m_stat(void *, uint_t, uint64_t *);
62 static int vnet_m_start(void *);
63 static void vnet_m_stop(void *);
64 static int vnet_m_promisc(void *, boolean_t);
65 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
66 static int vnet_m_unicst(void *, const uint8_t *);
67 mblk_t *vnet_m_tx(void *, mblk_t *);
68 
69 /* vnet internal functions */
70 static int vnet_mac_register(vnet_t *);
71 static int vnet_read_mac_address(vnet_t *vnetp);
72 static void vnet_add_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp);
73 static void vnet_del_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp);
74 static vp_tl_t *vnet_get_vptl(vnet_t *vnetp, const char *devname);
75 static fdb_t *vnet_lookup_fdb(fdb_fanout_t *fdbhp, uint8_t *macaddr);
76 
77 /* exported functions */
78 void vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
79 void vnet_del_fdb(void *arg, uint8_t *macaddr);
80 void vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
81 void vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg);
82 void vnet_del_def_rte(void *arg);
83 void vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp);
84 void vnet_tx_update(void *arg);
85 
86 /* externs */
87 extern int vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
88 	mac_register_t **vgenmacp);
89 extern void vgen_uninit(void *arg);
90 
91 static mac_callbacks_t vnet_m_callbacks = {
92 	0,
93 	vnet_m_stat,
94 	vnet_m_start,
95 	vnet_m_stop,
96 	vnet_m_promisc,
97 	vnet_m_multicst,
98 	vnet_m_unicst,
99 	vnet_m_tx,
100 	NULL,
101 	NULL,
102 	NULL
103 };
104 
105 /*
106  * Linked list of "vnet_t" structures - one per instance.
107  */
108 static vnet_t	*vnet_headp = NULL;
109 static krwlock_t vnet_rw;
110 
111 /* Tunables */
112 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
113 uint32_t vnet_reclaim_lowat = VNET_RECLAIM_LOWAT;  /* tx recl low watermark */
114 uint32_t vnet_reclaim_hiwat = VNET_RECLAIM_HIWAT;  /* tx recl high watermark */
115 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
116 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
117 uint32_t vnet_ldc_qlen = VNET_LDC_QLEN;		/* ldc qlen */
118 uint32_t vnet_nfdb_hash = VNET_NFDB_HASH;	/* size of fdb hash table */
119 
120 /*
121  * Property names
122  */
123 static char macaddr_propname[] = "local-mac-address";
124 
125 /*
126  * This is the string displayed by modinfo(1m).
127  */
128 static char vnet_ident[] = "vnet driver v%I%";
129 extern struct mod_ops mod_driverops;
130 static struct cb_ops cb_vnetops = {
131 	nulldev,		/* cb_open */
132 	nulldev,		/* cb_close */
133 	nodev,			/* cb_strategy */
134 	nodev,			/* cb_print */
135 	nodev,			/* cb_dump */
136 	nodev,			/* cb_read */
137 	nodev,			/* cb_write */
138 	nodev,			/* cb_ioctl */
139 	nodev,			/* cb_devmap */
140 	nodev,			/* cb_mmap */
141 	nodev,			/* cb_segmap */
142 	nochpoll,		/* cb_chpoll */
143 	ddi_prop_op,		/* cb_prop_op */
144 	NULL,			/* cb_stream */
145 	(int)(D_MP)		/* cb_flag */
146 };
147 
148 static struct dev_ops vnetops = {
149 	DEVO_REV,		/* devo_rev */
150 	0,			/* devo_refcnt */
151 	NULL,			/* devo_getinfo */
152 	nulldev,		/* devo_identify */
153 	nulldev,		/* devo_probe */
154 	vnetattach,		/* devo_attach */
155 	vnetdetach,		/* devo_detach */
156 	nodev,			/* devo_reset */
157 	&cb_vnetops,		/* devo_cb_ops */
158 	(struct bus_ops *)NULL	/* devo_bus_ops */
159 };
160 
161 static struct modldrv modldrv = {
162 	&mod_driverops,		/* Type of module.  This one is a driver */
163 	vnet_ident,		/* ID string */
164 	&vnetops		/* driver specific ops */
165 };
166 
167 static struct modlinkage modlinkage = {
168 	MODREV_1, (void *)&modldrv, NULL
169 };
170 
171 
172 /*
173  * Print debug messages - set to 0xf to enable all msgs
174  */
175 int _vnet_dbglevel = 0x8;
176 
177 void
178 _vnetdebug_printf(void *arg, const char *fmt, ...)
179 {
180 	char    buf[512];
181 	va_list ap;
182 	vnet_t *vnetp = (vnet_t *)arg;
183 
184 	va_start(ap, fmt);
185 	(void) vsprintf(buf, fmt, ap);
186 	va_end(ap);
187 
188 	if (vnetp == NULL)
189 		cmn_err(CE_CONT, "%s\n", buf);
190 	else
191 		cmn_err(CE_CONT, "vnet%d: %s\n", vnetp->instance, buf);
192 }
193 
194 #ifdef DEBUG
195 
196 /*
197  * XXX: any changes to the definitions below need corresponding changes in
198  * vnet_gen.c
199  */
200 
201 /*
202  * debug levels:
203  * DBG_LEVEL1:	Function entry/exit tracing
204  * DBG_LEVEL2:	Info messages
205  * DBG_LEVEL3:	Warning messages
206  * DBG_LEVEL4:	Error messages
207  */
208 
209 enum	{ DBG_LEVEL1 = 0x01, DBG_LEVEL2 = 0x02, DBG_LEVEL3 = 0x04,
210 	    DBG_LEVEL4 = 0x08 };
211 
212 #define	DBG1(_s)	do {						\
213 			    if ((_vnet_dbglevel & DBG_LEVEL1) != 0) {	\
214 					_vnetdebug_printf _s;		\
215 			    }					\
216 			_NOTE(CONSTCOND) } while (0)
217 
218 #define	DBG2(_s)	do {						\
219 			    if ((_vnet_dbglevel & DBG_LEVEL2) != 0) {	\
220 					_vnetdebug_printf _s;		\
221 			    }					\
222 			_NOTE(CONSTCOND) } while (0)
223 
224 #define	DWARN(_s)	do {						\
225 			    if ((_vnet_dbglevel & DBG_LEVEL3) != 0) {	\
226 					_vnetdebug_printf _s;		\
227 			    }					\
228 			_NOTE(CONSTCOND) } while (0)
229 
230 #define	DERR(_s)	do {						\
231 			    if ((_vnet_dbglevel & DBG_LEVEL4) != 0) {	\
232 					_vnetdebug_printf _s;		\
233 			    }					\
234 			_NOTE(CONSTCOND) } while (0)
235 
236 #else
237 
238 #define	DBG1(_s)	if (0)	_vnetdebug_printf _s
239 #define	DBG2(_s)	if (0)	_vnetdebug_printf _s
240 #define	DWARN(_s)	if (0)	_vnetdebug_printf _s
241 #define	DERR(_s)	if (0)	_vnetdebug_printf _s
242 
243 #endif
244 
245 /* _init(9E): initialize the loadable module */
246 int
247 _init(void)
248 {
249 	int status;
250 
251 	DBG1((NULL, "_init: enter\n"));
252 
253 	mac_init_ops(&vnetops, "vnet");
254 	status = mod_install(&modlinkage);
255 	if (status != 0) {
256 		mac_fini_ops(&vnetops);
257 	}
258 
259 	DBG1((NULL, "_init: exit\n"));
260 	return (status);
261 }
262 
263 /* _fini(9E): prepare the module for unloading. */
264 int
265 _fini(void)
266 {
267 	int status;
268 
269 	DBG1((NULL, "_fini: enter\n"));
270 
271 	status = mod_remove(&modlinkage);
272 	if (status != 0)
273 		return (status);
274 	mac_fini_ops(&vnetops);
275 
276 	DBG1((NULL, "_fini: exit\n"));
277 	return (status);
278 }
279 
280 /* _info(9E): return information about the loadable module */
281 int
282 _info(struct modinfo *modinfop)
283 {
284 	return (mod_info(&modlinkage, modinfop));
285 }
286 
287 /*
288  * attach(9E): attach a device to the system.
289  * called once for each instance of the device on the system.
290  */
291 static int
292 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
293 {
294 	vnet_t		*vnetp;
295 	vp_tl_t		*vp_tlp;
296 	int		instance;
297 	int		status;
298 	enum		{ AST_init = 0x0, AST_vnet_alloc = 0x1,
299 			    AST_read_macaddr = 0x2, AST_vgen_init = 0x4,
300 			    AST_vptl_alloc = 0x8, AST_fdbh_alloc = 0x10 }
301 			attach_state;
302 	mac_register_t	*vgenmacp = NULL;
303 	uint32_t	nfdbh = 0;
304 
305 	attach_state = AST_init;
306 
307 	switch (cmd) {
308 	case DDI_ATTACH:
309 		break;
310 	case DDI_RESUME:
311 	case DDI_PM_RESUME:
312 	default:
313 		goto vnet_attach_fail;
314 	}
315 
316 	instance = ddi_get_instance(dip);
317 	DBG1((NULL, "vnetattach: instance(%d) enter\n", instance));
318 
319 	/* allocate vnet_t and mac_t structures */
320 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
321 	attach_state |= AST_vnet_alloc;
322 
323 	/* setup links to vnet_t from both devinfo and mac_t */
324 	ddi_set_driver_private(dip, (caddr_t)vnetp);
325 	vnetp->dip = dip;
326 	vnetp->instance = instance;
327 
328 	/* read the mac address */
329 	status = vnet_read_mac_address(vnetp);
330 	if (status != DDI_SUCCESS) {
331 		goto vnet_attach_fail;
332 	}
333 	attach_state |= AST_read_macaddr;
334 
335 	/*
336 	 * Initialize the generic vnet proxy transport. This is the first
337 	 * and default transport used by vnet. The generic transport
338 	 * is provided by using sun4v LDC (logical domain channel). On success,
339 	 * vgen_init() provides a pointer to mac_t of generic transport.
340 	 * Currently, this generic layer provides network connectivity to other
341 	 * vnets within ldoms and also to remote hosts oustide ldoms through
342 	 * the virtual switch (vsw) device on domain0. In the future, when
343 	 * physical adapters that are able to share their resources (such as
344 	 * dma channels) with guest domains become available, the vnet device
345 	 * will use hardware specific driver to communicate directly over the
346 	 * physical device to reach remote hosts without going through vswitch.
347 	 */
348 	status = vgen_init(vnetp, vnetp->dip, (uint8_t *)vnetp->curr_macaddr,
349 	    &vgenmacp);
350 	if (status != DDI_SUCCESS) {
351 		DERR((vnetp, "vgen_init() failed\n"));
352 		goto vnet_attach_fail;
353 	}
354 	attach_state |= AST_vgen_init;
355 
356 	vp_tlp = kmem_zalloc(sizeof (vp_tl_t), KM_SLEEP);
357 	vp_tlp->macp = vgenmacp;
358 	(void) snprintf(vp_tlp->name, MAXNAMELEN, "%s%u", "vgen", instance);
359 	(void) strcpy(vnetp->vgen_name, vp_tlp->name);
360 
361 	/* add generic transport to the list of vnet proxy transports */
362 	vnet_add_vptl(vnetp, vp_tlp);
363 	attach_state |= AST_vptl_alloc;
364 
365 	nfdbh = vnet_nfdb_hash;
366 	if ((nfdbh < VNET_NFDB_HASH) || (nfdbh > VNET_NFDB_HASH_MAX)) {
367 		vnetp->nfdb_hash = VNET_NFDB_HASH;
368 	}
369 	else
370 		vnetp->nfdb_hash = nfdbh;
371 
372 	/* allocate fdb hash table, with an extra slot for default route */
373 	vnetp->fdbhp = kmem_zalloc(sizeof (fdb_fanout_t) *
374 	    (vnetp->nfdb_hash + 1), KM_SLEEP);
375 	attach_state |= AST_fdbh_alloc;
376 
377 	/* register with MAC layer */
378 	status = vnet_mac_register(vnetp);
379 	if (status != DDI_SUCCESS) {
380 		goto vnet_attach_fail;
381 	}
382 
383 	/* add to the list of vnet devices */
384 	WRITE_ENTER(&vnet_rw);
385 	vnetp->nextp = vnet_headp;
386 	vnet_headp = vnetp;
387 	RW_EXIT(&vnet_rw);
388 
389 	DBG1((NULL, "vnetattach: instance(%d) exit\n", instance));
390 	return (DDI_SUCCESS);
391 
392 vnet_attach_fail:
393 	if (attach_state & AST_fdbh_alloc) {
394 		kmem_free(vnetp->fdbhp,
395 		    sizeof (fdb_fanout_t) * (vnetp->nfdb_hash + 1));
396 	}
397 	if (attach_state & AST_vptl_alloc) {
398 		WRITE_ENTER(&vnetp->trwlock);
399 		vnet_del_vptl(vnetp, vp_tlp);
400 		RW_EXIT(&vnetp->trwlock);
401 	}
402 	if (attach_state & AST_vgen_init) {
403 		vgen_uninit(vgenmacp->m_driver);
404 	}
405 	if (attach_state & AST_vnet_alloc) {
406 		KMEM_FREE(vnetp);
407 	}
408 	return (DDI_FAILURE);
409 }
410 
411 /*
412  * detach(9E): detach a device from the system.
413  */
414 static int
415 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
416 {
417 	vnet_t		*vnetp;
418 	vnet_t		**vnetpp;
419 	vp_tl_t		*vp_tlp;
420 	int		instance;
421 
422 	instance = ddi_get_instance(dip);
423 	DBG1((NULL, "vnetdetach: instance(%d) enter\n", instance));
424 
425 	vnetp = ddi_get_driver_private(dip);
426 	if (vnetp == NULL) {
427 		goto vnet_detach_fail;
428 	}
429 
430 	switch (cmd) {
431 	case DDI_DETACH:
432 		break;
433 	case DDI_SUSPEND:
434 	case DDI_PM_SUSPEND:
435 	default:
436 		goto vnet_detach_fail;
437 	}
438 
439 	/*
440 	 * Unregister from the MAC subsystem.  This can fail, in
441 	 * particular if there are DLPI style-2 streams still open -
442 	 * in which case we just return failure.
443 	 */
444 	if (mac_unregister(vnetp->mh) != 0)
445 		goto vnet_detach_fail;
446 
447 	/* unlink from instance(vnet_t) list */
448 	WRITE_ENTER(&vnet_rw);
449 	for (vnetpp = &vnet_headp; *vnetpp; vnetpp = &(*vnetpp)->nextp) {
450 		if (*vnetpp == vnetp) {
451 			*vnetpp = vnetp->nextp;
452 			break;
453 		}
454 	}
455 	RW_EXIT(&vnet_rw);
456 
457 	/* uninit and free vnet proxy transports */
458 	WRITE_ENTER(&vnetp->trwlock);
459 	while ((vp_tlp = vnetp->tlp) != NULL) {
460 		if (strcmp(vnetp->vgen_name, vp_tlp->name) == 0) {
461 			/* uninitialize generic transport */
462 			vgen_uninit(vp_tlp->macp->m_driver);
463 		}
464 		vnet_del_vptl(vnetp, vp_tlp);
465 	}
466 	RW_EXIT(&vnetp->trwlock);
467 
468 	KMEM_FREE(vnetp);
469 
470 	return (DDI_SUCCESS);
471 
472 vnet_detach_fail:
473 	return (DDI_FAILURE);
474 }
475 
476 /* enable the device for transmit/receive */
477 static int
478 vnet_m_start(void *arg)
479 {
480 	vnet_t		*vnetp = arg;
481 	vp_tl_t		*vp_tlp;
482 	mac_register_t	*vp_macp;
483 	mac_callbacks_t	*cbp;
484 
485 	DBG1((vnetp, "vnet_m_start: enter\n"));
486 
487 	/*
488 	 * XXX
489 	 * Currently, we only have generic transport. m_start() invokes
490 	 * vgen_start() which enables ports/channels in vgen and
491 	 * initiates handshake with peer vnets and vsw. In the future when we
492 	 * have support for hardware specific transports, this information
493 	 * needs to be propagted back to vnet from vgen and we need to revisit
494 	 * this code (see comments in vnet_attach()).
495 	 *
496 	 */
497 	WRITE_ENTER(&vnetp->trwlock);
498 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
499 		vp_macp = vp_tlp->macp;
500 		cbp = vp_macp->m_callbacks;
501 		cbp->mc_start(vp_macp->m_driver);
502 	}
503 	RW_EXIT(&vnetp->trwlock);
504 
505 	DBG1((vnetp, "vnet_m_start: exit\n"));
506 	return (VNET_SUCCESS);
507 
508 }
509 
510 /* stop transmit/receive for the device */
511 static void
512 vnet_m_stop(void *arg)
513 {
514 	vnet_t		*vnetp = arg;
515 	vp_tl_t		*vp_tlp;
516 	mac_register_t	*vp_macp;
517 	mac_callbacks_t	*cbp;
518 
519 	DBG1((vnetp, "vnet_m_stop: enter\n"));
520 
521 	WRITE_ENTER(&vnetp->trwlock);
522 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
523 		vp_macp = vp_tlp->macp;
524 		cbp = vp_macp->m_callbacks;
525 		cbp->mc_stop(vp_macp->m_driver);
526 	}
527 	RW_EXIT(&vnetp->trwlock);
528 
529 	DBG1((vnetp, "vnet_m_stop: exit\n"));
530 }
531 
532 /* set the unicast mac address of the device */
533 static int
534 vnet_m_unicst(void *arg, const uint8_t *macaddr)
535 {
536 	_NOTE(ARGUNUSED(macaddr))
537 
538 	vnet_t *vnetp = arg;
539 
540 	DBG1((vnetp, "vnet_m_unicst: enter\n"));
541 	/*
542 	 * XXX: setting mac address dynamically is not supported.
543 	 */
544 	DBG1((vnetp, "vnet_m_unicst: exit\n"));
545 
546 	return (VNET_FAILURE);
547 }
548 
549 /* enable/disable a multicast address */
550 static int
551 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
552 {
553 	_NOTE(ARGUNUSED(add, mca))
554 
555 	vnet_t *vnetp = arg;
556 	vp_tl_t		*vp_tlp;
557 	mac_register_t	*vp_macp;
558 	mac_callbacks_t	*cbp;
559 	int rv = VNET_SUCCESS;
560 
561 	DBG1((vnetp, "vnet_m_multicst: enter\n"));
562 	READ_ENTER(&vnetp->trwlock);
563 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
564 		if (strcmp(vnetp->vgen_name, vp_tlp->name) == 0) {
565 			vp_macp = vp_tlp->macp;
566 			cbp = vp_macp->m_callbacks;
567 			rv = cbp->mc_multicst(vp_macp->m_driver, add, mca);
568 			break;
569 		}
570 	}
571 	RW_EXIT(&vnetp->trwlock);
572 	DBG1((vnetp, "vnet_m_multicst: exit\n"));
573 	return (rv);
574 }
575 
576 /* set or clear promiscuous mode on the device */
577 static int
578 vnet_m_promisc(void *arg, boolean_t on)
579 {
580 	_NOTE(ARGUNUSED(on))
581 
582 	vnet_t *vnetp = arg;
583 	DBG1((vnetp, "vnet_m_promisc: enter\n"));
584 	/*
585 	 * XXX: setting promiscuous mode is not supported, just return success.
586 	 */
587 	DBG1((vnetp, "vnet_m_promisc: exit\n"));
588 	return (VNET_SUCCESS);
589 }
590 
591 /*
592  * Transmit a chain of packets. This function provides switching functionality
593  * based on the destination mac address to reach other guests (within ldoms) or
594  * external hosts.
595  */
596 mblk_t *
597 vnet_m_tx(void *arg, mblk_t *mp)
598 {
599 	vnet_t *vnetp;
600 	mblk_t *next;
601 	uint32_t fdbhash;
602 	fdb_t *fdbp;
603 	fdb_fanout_t *fdbhp;
604 	struct ether_header *ehp;
605 	uint8_t *macaddr;
606 	mblk_t *resid_mp;
607 
608 	vnetp = (vnet_t *)arg;
609 	DBG1((vnetp, "vnet_m_tx: enter\n"));
610 	ASSERT(mp != NULL);
611 
612 	while (mp != NULL) {
613 		next = mp->b_next;
614 		mp->b_next = NULL;
615 
616 		/* get the destination mac address in the eth header */
617 		ehp = (struct ether_header *)mp->b_rptr;
618 		macaddr = (uint8_t *)&ehp->ether_dhost;
619 
620 		/* Calculate hash value and fdb fanout */
621 		fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
622 		fdbhp = &(vnetp->fdbhp[fdbhash]);
623 
624 		READ_ENTER(&fdbhp->rwlock);
625 		fdbp = vnet_lookup_fdb(fdbhp, macaddr);
626 		if (fdbp) {
627 			/*
628 			 * If the destination is in FDB, the destination is
629 			 * a vnet device within ldoms and directly reachable,
630 			 * invoke the tx function in the fdb entry.
631 			 */
632 			resid_mp = fdbp->m_tx(fdbp->txarg, mp);
633 			if (resid_mp != NULL) {
634 				/* m_tx failed */
635 				mp->b_next = next;
636 				RW_EXIT(&fdbhp->rwlock);
637 				break;
638 			}
639 			RW_EXIT(&fdbhp->rwlock);
640 		} else {
641 			/* destination is not in FDB */
642 			RW_EXIT(&fdbhp->rwlock);
643 			/*
644 			 * If the destination is broadcast/multicast
645 			 * or an unknown unicast address, forward the
646 			 * packet to vsw, using the last slot in fdb which is
647 			 * reserved for default route.
648 			 */
649 			fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
650 			READ_ENTER(&fdbhp->rwlock);
651 			fdbp = fdbhp->headp;
652 			if (fdbp) {
653 				resid_mp = fdbp->m_tx(fdbp->txarg, mp);
654 				if (resid_mp != NULL) {
655 					/* m_tx failed */
656 					mp->b_next = next;
657 					RW_EXIT(&fdbhp->rwlock);
658 					break;
659 				}
660 			} else {
661 				/* drop the packet */
662 				freemsg(mp);
663 			}
664 			RW_EXIT(&fdbhp->rwlock);
665 		}
666 
667 		mp = next;
668 	}
669 
670 	DBG1((vnetp, "vnet_m_tx: exit\n"));
671 	return (mp);
672 }
673 
674 /* get statistics from the device */
675 int
676 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
677 {
678 	vnet_t *vnetp = arg;
679 	vp_tl_t	*vp_tlp;
680 	mac_register_t	*vp_macp;
681 	mac_callbacks_t	*cbp;
682 	uint64_t val_total = 0;
683 
684 	DBG1((vnetp, "vnet_m_stat: enter\n"));
685 
686 	/*
687 	 * get the specified statistic from each transport and return the
688 	 * aggregate val.  This obviously only works for counters.
689 	 */
690 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
691 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
692 		return (ENOTSUP);
693 	}
694 	READ_ENTER(&vnetp->trwlock);
695 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
696 		vp_macp = vp_tlp->macp;
697 		cbp = vp_macp->m_callbacks;
698 		if (cbp->mc_getstat(vp_macp->m_driver, stat, val) == 0)
699 			val_total += *val;
700 	}
701 	RW_EXIT(&vnetp->trwlock);
702 
703 	*val = val_total;
704 
705 	DBG1((vnetp, "vnet_m_stat: exit\n"));
706 	return (0);
707 }
708 
709 /* wrapper function for mac_register() */
710 static int
711 vnet_mac_register(vnet_t *vnetp)
712 {
713 	mac_register_t	*macp;
714 	int		err;
715 
716 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
717 		return (DDI_FAILURE);
718 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
719 	macp->m_driver = vnetp;
720 	macp->m_dip = vnetp->dip;
721 	macp->m_src_addr = vnetp->curr_macaddr;
722 	macp->m_callbacks = &vnet_m_callbacks;
723 	macp->m_min_sdu = 0;
724 	macp->m_max_sdu = ETHERMTU;
725 
726 	/*
727 	 * Finally, we're ready to register ourselves with the MAC layer
728 	 * interface; if this succeeds, we're all ready to start()
729 	 */
730 	err = mac_register(macp, &vnetp->mh);
731 	mac_free(macp);
732 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
733 }
734 
735 /* add vp_tl to the list */
736 static void
737 vnet_add_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp)
738 {
739 	vp_tl_t *ttlp;
740 
741 	WRITE_ENTER(&vnetp->trwlock);
742 	if (vnetp->tlp == NULL) {
743 		vnetp->tlp = vp_tlp;
744 	} else {
745 		ttlp = vnetp->tlp;
746 		while (ttlp->nextp)
747 			ttlp = ttlp->nextp;
748 		ttlp->nextp = vp_tlp;
749 	}
750 	RW_EXIT(&vnetp->trwlock);
751 }
752 
753 /* remove vp_tl from the list */
754 static void
755 vnet_del_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp)
756 {
757 	vp_tl_t *ttlp, **pretlp;
758 	boolean_t found = B_FALSE;
759 
760 	pretlp = &vnetp->tlp;
761 	ttlp = *pretlp;
762 	while (ttlp) {
763 		if (ttlp == vp_tlp) {
764 			found = B_TRUE;
765 			(*pretlp) = ttlp->nextp;
766 			ttlp->nextp = NULL;
767 			break;
768 		}
769 		pretlp = &(ttlp->nextp);
770 		ttlp = *pretlp;
771 	}
772 
773 	if (found) {
774 		KMEM_FREE(vp_tlp);
775 	}
776 }
777 
778 /* get vp_tl corresponding to the given name */
779 static vp_tl_t *
780 vnet_get_vptl(vnet_t *vnetp, const char *name)
781 {
782 	vp_tl_t *tlp;
783 
784 	tlp = vnetp->tlp;
785 	while (tlp) {
786 		if (strcmp(tlp->name, name) == 0) {
787 			return (tlp);
788 		}
789 		tlp = tlp->nextp;
790 	}
791 	DWARN((vnetp,
792 	    "vnet_get_vptl: can't find vp_tl with name (%s)\n", name));
793 	return (NULL);
794 }
795 
796 /* read the mac address of the device */
797 static int
798 vnet_read_mac_address(vnet_t *vnetp)
799 {
800 	uchar_t 	*macaddr;
801 	uint32_t 	size;
802 	int 		rv;
803 
804 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
805 		DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
806 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
807 		DWARN((vnetp,
808 		"vnet_read_mac_address: prop_lookup failed (%s) err (%d)\n",
809 		macaddr_propname, rv));
810 		return (DDI_FAILURE);
811 	}
812 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
813 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
814 	ddi_prop_free(macaddr);
815 
816 	return (DDI_SUCCESS);
817 }
818 
819 
820 /*
821  * Functions below are called only by generic transport to add/remove/modify
822  * entries in forwarding database. See comments in vgen_port_init(vnet_gen.c).
823  */
824 
825 /* add an entry into the forwarding database */
826 void
827 vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg)
828 {
829 	vnet_t *vnetp = (vnet_t *)arg;
830 	uint32_t fdbhash;
831 	fdb_t *fdbp;
832 	fdb_fanout_t *fdbhp;
833 
834 	/* Calculate hash value and fdb fanout */
835 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
836 	fdbhp = &(vnetp->fdbhp[fdbhash]);
837 
838 	WRITE_ENTER(&fdbhp->rwlock);
839 
840 	fdbp = kmem_zalloc(sizeof (fdb_t), KM_NOSLEEP);
841 	if (fdbp == NULL) {
842 		RW_EXIT(&fdbhp->rwlock);
843 		return;
844 	}
845 	bcopy(macaddr, (caddr_t)fdbp->macaddr, ETHERADDRL);
846 	fdbp->m_tx = m_tx;
847 	fdbp->txarg = txarg;
848 	fdbp->nextp = fdbhp->headp;
849 	fdbhp->headp = fdbp;
850 
851 	RW_EXIT(&fdbhp->rwlock);
852 }
853 
854 /* delete an entry from the forwarding database */
855 void
856 vnet_del_fdb(void *arg, uint8_t *macaddr)
857 {
858 	vnet_t *vnetp = (vnet_t *)arg;
859 	uint32_t fdbhash;
860 	fdb_t *fdbp;
861 	fdb_t **pfdbp;
862 	fdb_fanout_t *fdbhp;
863 
864 	/* Calculate hash value and fdb fanout */
865 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
866 	fdbhp = &(vnetp->fdbhp[fdbhash]);
867 
868 	WRITE_ENTER(&fdbhp->rwlock);
869 
870 	for (pfdbp = &fdbhp->headp; (fdbp  = *pfdbp) != NULL;
871 	    pfdbp = &fdbp->nextp) {
872 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
873 			/* Unlink it from the list */
874 			*pfdbp = fdbp->nextp;
875 			KMEM_FREE(fdbp);
876 			break;
877 		}
878 	}
879 
880 	RW_EXIT(&fdbhp->rwlock);
881 }
882 
883 /* modify an existing entry in the forwarding database */
884 void
885 vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg)
886 {
887 	vnet_t *vnetp = (vnet_t *)arg;
888 	uint32_t fdbhash;
889 	fdb_t *fdbp;
890 	fdb_fanout_t *fdbhp;
891 
892 	/* Calculate hash value and fdb fanout */
893 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
894 	fdbhp = &(vnetp->fdbhp[fdbhash]);
895 
896 	WRITE_ENTER(&fdbhp->rwlock);
897 
898 	for (fdbp = fdbhp->headp; fdbp != NULL; fdbp = fdbp->nextp) {
899 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
900 			/* change the entry to have new tx params */
901 			fdbp->m_tx = m_tx;
902 			fdbp->txarg = txarg;
903 			break;
904 		}
905 	}
906 
907 	RW_EXIT(&fdbhp->rwlock);
908 }
909 
910 /* look up an fdb entry based on the mac address, caller holds lock */
911 static fdb_t *
912 vnet_lookup_fdb(fdb_fanout_t *fdbhp, uint8_t *macaddr)
913 {
914 	fdb_t *fdbp = NULL;
915 
916 	for (fdbp = fdbhp->headp; fdbp != NULL; fdbp = fdbp->nextp) {
917 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
918 			break;
919 		}
920 	}
921 
922 	return (fdbp);
923 }
924 
925 /* add default route entry into the forwarding database */
926 void
927 vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg)
928 {
929 	vnet_t *vnetp = (vnet_t *)arg;
930 	fdb_t *fdbp;
931 	fdb_fanout_t *fdbhp;
932 
933 	/*
934 	 * The last hash list is reserved for default route entry,
935 	 * and for now, we have only one entry in this list.
936 	 */
937 	fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
938 
939 	WRITE_ENTER(&fdbhp->rwlock);
940 
941 	if (fdbhp->headp) {
942 		DWARN((vnetp,
943 		    "vnet_add_def_rte: default rte already exists\n"));
944 		RW_EXIT(&fdbhp->rwlock);
945 		return;
946 	}
947 	fdbp = kmem_zalloc(sizeof (fdb_t), KM_NOSLEEP);
948 	if (fdbp == NULL) {
949 		RW_EXIT(&fdbhp->rwlock);
950 		return;
951 	}
952 	bzero(fdbp->macaddr, ETHERADDRL);
953 	fdbp->m_tx = m_tx;
954 	fdbp->txarg = txarg;
955 	fdbp->nextp = NULL;
956 	fdbhp->headp = fdbp;
957 
958 	RW_EXIT(&fdbhp->rwlock);
959 }
960 
961 /* delete default route entry from the forwarding database */
962 void
963 vnet_del_def_rte(void *arg)
964 {
965 	vnet_t *vnetp = (vnet_t *)arg;
966 	fdb_t *fdbp;
967 	fdb_fanout_t *fdbhp;
968 
969 	/*
970 	 * The last hash list is reserved for default route entry,
971 	 * and for now, we have only one entry in this list.
972 	 */
973 	fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
974 
975 	WRITE_ENTER(&fdbhp->rwlock);
976 
977 	if (fdbhp->headp == NULL) {
978 		RW_EXIT(&fdbhp->rwlock);
979 		return;
980 	}
981 	fdbp = fdbhp->headp;
982 	KMEM_FREE(fdbp);
983 	fdbhp->headp = NULL;
984 
985 	RW_EXIT(&fdbhp->rwlock);
986 }
987 
988 void
989 vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
990 {
991 	vnet_t *vnetp = arg;
992 	mac_rx(vnetp->mh, mrh, mp);
993 }
994 
995 void
996 vnet_tx_update(void *arg)
997 {
998 	vnet_t *vnetp = arg;
999 	mac_tx_update(vnetp->mh);
1000 }
1001