xref: /titanic_51/usr/src/uts/sun4v/io/vnet.c (revision 5749802bc1ab53eee0631759471dabfc4b455cd4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/dlpi.h>
42 #include <net/if.h>
43 #include <sys/mac.h>
44 #include <sys/mac_ether.h>
45 #include <sys/ddi.h>
46 #include <sys/sunddi.h>
47 #include <sys/strsun.h>
48 #include <sys/note.h>
49 #include <sys/vnet.h>
50 
51 /*
52  * Function prototypes.
53  */
54 
55 /* DDI entrypoints */
56 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
57 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
58 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
59 
60 /* MAC entrypoints  */
61 static int vnet_m_stat(void *, uint_t, uint64_t *);
62 static int vnet_m_start(void *);
63 static void vnet_m_stop(void *);
64 static int vnet_m_promisc(void *, boolean_t);
65 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
66 static int vnet_m_unicst(void *, const uint8_t *);
67 mblk_t *vnet_m_tx(void *, mblk_t *);
68 
69 /* vnet internal functions */
70 static int vnet_mac_register(vnet_t *);
71 static int vnet_read_mac_address(vnet_t *vnetp);
72 static void vnet_add_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp);
73 static void vnet_del_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp);
74 static vp_tl_t *vnet_get_vptl(vnet_t *vnetp, const char *devname);
75 static fdb_t *vnet_lookup_fdb(fdb_fanout_t *fdbhp, uint8_t *macaddr);
76 
77 /* exported functions */
78 void vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
79 void vnet_del_fdb(void *arg, uint8_t *macaddr);
80 void vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
81 void vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg);
82 void vnet_del_def_rte(void *arg);
83 void vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp);
84 void vnet_tx_update(void *arg);
85 
86 /* externs */
87 extern int vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
88 	mac_register_t **vgenmacp);
89 extern int vgen_uninit(void *arg);
90 
91 static mac_callbacks_t vnet_m_callbacks = {
92 	0,
93 	vnet_m_stat,
94 	vnet_m_start,
95 	vnet_m_stop,
96 	vnet_m_promisc,
97 	vnet_m_multicst,
98 	vnet_m_unicst,
99 	vnet_m_tx,
100 	NULL,
101 	NULL,
102 	NULL
103 };
104 
105 /*
106  * Linked list of "vnet_t" structures - one per instance.
107  */
108 static vnet_t	*vnet_headp = NULL;
109 static krwlock_t vnet_rw;
110 
111 /* Tunables */
112 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
113 uint32_t vnet_reclaim_lowat = VNET_RECLAIM_LOWAT;  /* tx recl low watermark */
114 uint32_t vnet_reclaim_hiwat = VNET_RECLAIM_HIWAT;  /* tx recl high watermark */
115 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
116 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
117 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
118 uint32_t vnet_nfdb_hash = VNET_NFDB_HASH;	/* size of fdb hash table */
119 uint32_t vnet_nrbufs = VNET_NRBUFS;	/* number of receive buffers */
120 
121 /*
122  * Property names
123  */
124 static char macaddr_propname[] = "local-mac-address";
125 
126 /*
127  * This is the string displayed by modinfo(1m).
128  */
129 static char vnet_ident[] = "vnet driver v%I%";
130 extern struct mod_ops mod_driverops;
131 static struct cb_ops cb_vnetops = {
132 	nulldev,		/* cb_open */
133 	nulldev,		/* cb_close */
134 	nodev,			/* cb_strategy */
135 	nodev,			/* cb_print */
136 	nodev,			/* cb_dump */
137 	nodev,			/* cb_read */
138 	nodev,			/* cb_write */
139 	nodev,			/* cb_ioctl */
140 	nodev,			/* cb_devmap */
141 	nodev,			/* cb_mmap */
142 	nodev,			/* cb_segmap */
143 	nochpoll,		/* cb_chpoll */
144 	ddi_prop_op,		/* cb_prop_op */
145 	NULL,			/* cb_stream */
146 	(int)(D_MP)		/* cb_flag */
147 };
148 
149 static struct dev_ops vnetops = {
150 	DEVO_REV,		/* devo_rev */
151 	0,			/* devo_refcnt */
152 	NULL,			/* devo_getinfo */
153 	nulldev,		/* devo_identify */
154 	nulldev,		/* devo_probe */
155 	vnetattach,		/* devo_attach */
156 	vnetdetach,		/* devo_detach */
157 	nodev,			/* devo_reset */
158 	&cb_vnetops,		/* devo_cb_ops */
159 	(struct bus_ops *)NULL	/* devo_bus_ops */
160 };
161 
162 static struct modldrv modldrv = {
163 	&mod_driverops,		/* Type of module.  This one is a driver */
164 	vnet_ident,		/* ID string */
165 	&vnetops		/* driver specific ops */
166 };
167 
168 static struct modlinkage modlinkage = {
169 	MODREV_1, (void *)&modldrv, NULL
170 };
171 
172 
173 /*
174  * Print debug messages - set to 0xf to enable all msgs
175  */
176 int _vnet_dbglevel = 0x8;
177 
178 void
179 _vnetdebug_printf(void *arg, const char *fmt, ...)
180 {
181 	char    buf[512];
182 	va_list ap;
183 	vnet_t *vnetp = (vnet_t *)arg;
184 
185 	va_start(ap, fmt);
186 	(void) vsprintf(buf, fmt, ap);
187 	va_end(ap);
188 
189 	if (vnetp == NULL)
190 		cmn_err(CE_CONT, "%s\n", buf);
191 	else
192 		cmn_err(CE_CONT, "vnet%d: %s\n", vnetp->instance, buf);
193 }
194 
195 #ifdef DEBUG
196 
197 /*
198  * XXX: any changes to the definitions below need corresponding changes in
199  * vnet_gen.c
200  */
201 
202 /*
203  * debug levels:
204  * DBG_LEVEL1:	Function entry/exit tracing
205  * DBG_LEVEL2:	Info messages
206  * DBG_LEVEL3:	Warning messages
207  * DBG_LEVEL4:	Error messages
208  */
209 
210 enum	{ DBG_LEVEL1 = 0x01, DBG_LEVEL2 = 0x02, DBG_LEVEL3 = 0x04,
211 	    DBG_LEVEL4 = 0x08 };
212 
213 #define	DBG1(_s)	do {						\
214 			    if ((_vnet_dbglevel & DBG_LEVEL1) != 0) {	\
215 					_vnetdebug_printf _s;		\
216 			    }					\
217 			_NOTE(CONSTCOND) } while (0)
218 
219 #define	DBG2(_s)	do {						\
220 			    if ((_vnet_dbglevel & DBG_LEVEL2) != 0) {	\
221 					_vnetdebug_printf _s;		\
222 			    }					\
223 			_NOTE(CONSTCOND) } while (0)
224 
225 #define	DWARN(_s)	do {						\
226 			    if ((_vnet_dbglevel & DBG_LEVEL3) != 0) {	\
227 					_vnetdebug_printf _s;		\
228 			    }					\
229 			_NOTE(CONSTCOND) } while (0)
230 
231 #define	DERR(_s)	do {						\
232 			    if ((_vnet_dbglevel & DBG_LEVEL4) != 0) {	\
233 					_vnetdebug_printf _s;		\
234 			    }					\
235 			_NOTE(CONSTCOND) } while (0)
236 
237 #else
238 
239 #define	DBG1(_s)	if (0)	_vnetdebug_printf _s
240 #define	DBG2(_s)	if (0)	_vnetdebug_printf _s
241 #define	DWARN(_s)	if (0)	_vnetdebug_printf _s
242 #define	DERR(_s)	if (0)	_vnetdebug_printf _s
243 
244 #endif
245 
246 /* _init(9E): initialize the loadable module */
247 int
248 _init(void)
249 {
250 	int status;
251 
252 	DBG1((NULL, "_init: enter\n"));
253 
254 	mac_init_ops(&vnetops, "vnet");
255 	status = mod_install(&modlinkage);
256 	if (status != 0) {
257 		mac_fini_ops(&vnetops);
258 	}
259 
260 	DBG1((NULL, "_init: exit\n"));
261 	return (status);
262 }
263 
264 /* _fini(9E): prepare the module for unloading. */
265 int
266 _fini(void)
267 {
268 	int status;
269 
270 	DBG1((NULL, "_fini: enter\n"));
271 
272 	status = mod_remove(&modlinkage);
273 	if (status != 0)
274 		return (status);
275 	mac_fini_ops(&vnetops);
276 
277 	DBG1((NULL, "_fini: exit\n"));
278 	return (status);
279 }
280 
281 /* _info(9E): return information about the loadable module */
282 int
283 _info(struct modinfo *modinfop)
284 {
285 	return (mod_info(&modlinkage, modinfop));
286 }
287 
288 /*
289  * attach(9E): attach a device to the system.
290  * called once for each instance of the device on the system.
291  */
292 static int
293 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
294 {
295 	vnet_t		*vnetp;
296 	vp_tl_t		*vp_tlp;
297 	int		instance;
298 	int		status;
299 	enum		{ AST_init = 0x0, AST_vnet_alloc = 0x1,
300 			    AST_mac_alloc = 0x2, AST_read_macaddr = 0x4,
301 			    AST_vgen_init = 0x8, AST_vptl_alloc = 0x10,
302 			    AST_fdbh_alloc = 0x20 }
303 			attach_state;
304 	mac_register_t	*vgenmacp = NULL;
305 	uint32_t	nfdbh = 0;
306 
307 	attach_state = AST_init;
308 
309 	switch (cmd) {
310 	case DDI_ATTACH:
311 		break;
312 	case DDI_RESUME:
313 	case DDI_PM_RESUME:
314 	default:
315 		goto vnet_attach_fail;
316 	}
317 
318 	instance = ddi_get_instance(dip);
319 	DBG1((NULL, "vnetattach: instance(%d) enter\n", instance));
320 
321 	/* allocate vnet_t and mac_t structures */
322 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
323 	attach_state |= AST_vnet_alloc;
324 
325 	/* setup links to vnet_t from both devinfo and mac_t */
326 	ddi_set_driver_private(dip, (caddr_t)vnetp);
327 	vnetp->dip = dip;
328 	vnetp->instance = instance;
329 
330 	/* read the mac address */
331 	status = vnet_read_mac_address(vnetp);
332 	if (status != DDI_SUCCESS) {
333 		goto vnet_attach_fail;
334 	}
335 	attach_state |= AST_read_macaddr;
336 
337 	/*
338 	 * Initialize the generic vnet proxy transport. This is the first
339 	 * and default transport used by vnet. The generic transport
340 	 * is provided by using sun4v LDC (logical domain channel). On success,
341 	 * vgen_init() provides a pointer to mac_t of generic transport.
342 	 * Currently, this generic layer provides network connectivity to other
343 	 * vnets within ldoms and also to remote hosts oustide ldoms through
344 	 * the virtual switch (vsw) device on domain0. In the future, when
345 	 * physical adapters that are able to share their resources (such as
346 	 * dma channels) with guest domains become available, the vnet device
347 	 * will use hardware specific driver to communicate directly over the
348 	 * physical device to reach remote hosts without going through vswitch.
349 	 */
350 	status = vgen_init(vnetp, vnetp->dip, (uint8_t *)vnetp->curr_macaddr,
351 	    &vgenmacp);
352 	if (status != DDI_SUCCESS) {
353 		DERR((vnetp, "vgen_init() failed\n"));
354 		goto vnet_attach_fail;
355 	}
356 	attach_state |= AST_vgen_init;
357 
358 	vp_tlp = kmem_zalloc(sizeof (vp_tl_t), KM_SLEEP);
359 	vp_tlp->macp = vgenmacp;
360 	(void) snprintf(vp_tlp->name, MAXNAMELEN, "%s%u", "vgen", instance);
361 	(void) strcpy(vnetp->vgen_name, vp_tlp->name);
362 
363 	/* add generic transport to the list of vnet proxy transports */
364 	vnet_add_vptl(vnetp, vp_tlp);
365 	attach_state |= AST_vptl_alloc;
366 
367 	nfdbh = vnet_nfdb_hash;
368 	if ((nfdbh < VNET_NFDB_HASH) || (nfdbh > VNET_NFDB_HASH_MAX)) {
369 		vnetp->nfdb_hash = VNET_NFDB_HASH;
370 	}
371 	else
372 		vnetp->nfdb_hash = nfdbh;
373 
374 	/* allocate fdb hash table, with an extra slot for default route */
375 	vnetp->fdbhp = kmem_zalloc(sizeof (fdb_fanout_t) *
376 	    (vnetp->nfdb_hash + 1), KM_SLEEP);
377 	attach_state |= AST_fdbh_alloc;
378 
379 	/* register with MAC layer */
380 	status = vnet_mac_register(vnetp);
381 	if (status != DDI_SUCCESS) {
382 		goto vnet_attach_fail;
383 	}
384 
385 	/* add to the list of vnet devices */
386 	WRITE_ENTER(&vnet_rw);
387 	vnetp->nextp = vnet_headp;
388 	vnet_headp = vnetp;
389 	RW_EXIT(&vnet_rw);
390 
391 	DBG1((NULL, "vnetattach: instance(%d) exit\n", instance));
392 	return (DDI_SUCCESS);
393 
394 vnet_attach_fail:
395 	if (attach_state & AST_fdbh_alloc) {
396 		kmem_free(vnetp->fdbhp,
397 		    sizeof (fdb_fanout_t) * (vnetp->nfdb_hash + 1));
398 	}
399 	if (attach_state & AST_vptl_alloc) {
400 		WRITE_ENTER(&vnetp->trwlock);
401 		vnet_del_vptl(vnetp, vp_tlp);
402 		RW_EXIT(&vnetp->trwlock);
403 	}
404 	if (attach_state & AST_vgen_init) {
405 		(void) vgen_uninit(vgenmacp->m_driver);
406 	}
407 	if (attach_state & AST_vnet_alloc) {
408 		KMEM_FREE(vnetp);
409 	}
410 	return (DDI_FAILURE);
411 }
412 
413 /*
414  * detach(9E): detach a device from the system.
415  */
416 static int
417 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
418 {
419 	vnet_t		*vnetp;
420 	vnet_t		**vnetpp;
421 	vp_tl_t		*vp_tlp;
422 	int		instance;
423 	int		rv;
424 
425 	instance = ddi_get_instance(dip);
426 	DBG1((NULL, "vnetdetach: instance(%d) enter\n", instance));
427 
428 	vnetp = ddi_get_driver_private(dip);
429 	if (vnetp == NULL) {
430 		goto vnet_detach_fail;
431 	}
432 
433 	switch (cmd) {
434 	case DDI_DETACH:
435 		break;
436 	case DDI_SUSPEND:
437 	case DDI_PM_SUSPEND:
438 	default:
439 		goto vnet_detach_fail;
440 	}
441 
442 	/* uninit and free vnet proxy transports */
443 	WRITE_ENTER(&vnetp->trwlock);
444 	while ((vp_tlp = vnetp->tlp) != NULL) {
445 		if (strcmp(vnetp->vgen_name, vp_tlp->name) == 0) {
446 			/* uninitialize generic transport */
447 			rv = vgen_uninit(vp_tlp->macp->m_driver);
448 			if (rv != DDI_SUCCESS) {
449 				RW_EXIT(&vnetp->trwlock);
450 				goto vnet_detach_fail;
451 			}
452 		}
453 		vnet_del_vptl(vnetp, vp_tlp);
454 	}
455 	RW_EXIT(&vnetp->trwlock);
456 
457 	/*
458 	 * Unregister from the MAC subsystem.  This can fail, in
459 	 * particular if there are DLPI style-2 streams still open -
460 	 * in which case we just return failure.
461 	 */
462 	if (mac_unregister(vnetp->mh) != 0)
463 		goto vnet_detach_fail;
464 
465 	/* unlink from instance(vnet_t) list */
466 	WRITE_ENTER(&vnet_rw);
467 	for (vnetpp = &vnet_headp; *vnetpp; vnetpp = &(*vnetpp)->nextp) {
468 		if (*vnetpp == vnetp) {
469 			*vnetpp = vnetp->nextp;
470 			break;
471 		}
472 	}
473 	RW_EXIT(&vnet_rw);
474 
475 	KMEM_FREE(vnetp);
476 
477 	return (DDI_SUCCESS);
478 
479 vnet_detach_fail:
480 	return (DDI_FAILURE);
481 }
482 
483 /* enable the device for transmit/receive */
484 static int
485 vnet_m_start(void *arg)
486 {
487 	vnet_t		*vnetp = arg;
488 	vp_tl_t		*vp_tlp;
489 	mac_register_t	*vp_macp;
490 	mac_callbacks_t	*cbp;
491 
492 	DBG1((vnetp, "vnet_m_start: enter\n"));
493 
494 	/*
495 	 * XXX
496 	 * Currently, we only have generic transport. m_start() invokes
497 	 * vgen_start() which enables ports/channels in vgen and
498 	 * initiates handshake with peer vnets and vsw. In the future when we
499 	 * have support for hardware specific transports, this information
500 	 * needs to be propagted back to vnet from vgen and we need to revisit
501 	 * this code (see comments in vnet_attach()).
502 	 *
503 	 */
504 	WRITE_ENTER(&vnetp->trwlock);
505 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
506 		vp_macp = vp_tlp->macp;
507 		cbp = vp_macp->m_callbacks;
508 		cbp->mc_start(vp_macp->m_driver);
509 	}
510 	RW_EXIT(&vnetp->trwlock);
511 
512 	DBG1((vnetp, "vnet_m_start: exit\n"));
513 	return (VNET_SUCCESS);
514 
515 }
516 
517 /* stop transmit/receive for the device */
518 static void
519 vnet_m_stop(void *arg)
520 {
521 	vnet_t		*vnetp = arg;
522 	vp_tl_t		*vp_tlp;
523 	mac_register_t	*vp_macp;
524 	mac_callbacks_t	*cbp;
525 
526 	DBG1((vnetp, "vnet_m_stop: enter\n"));
527 
528 	WRITE_ENTER(&vnetp->trwlock);
529 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
530 		vp_macp = vp_tlp->macp;
531 		cbp = vp_macp->m_callbacks;
532 		cbp->mc_stop(vp_macp->m_driver);
533 	}
534 	RW_EXIT(&vnetp->trwlock);
535 
536 	DBG1((vnetp, "vnet_m_stop: exit\n"));
537 }
538 
539 /* set the unicast mac address of the device */
540 static int
541 vnet_m_unicst(void *arg, const uint8_t *macaddr)
542 {
543 	_NOTE(ARGUNUSED(macaddr))
544 
545 	vnet_t *vnetp = arg;
546 
547 	DBG1((vnetp, "vnet_m_unicst: enter\n"));
548 	/*
549 	 * XXX: setting mac address dynamically is not supported.
550 	 */
551 	DBG1((vnetp, "vnet_m_unicst: exit\n"));
552 
553 	return (VNET_FAILURE);
554 }
555 
556 /* enable/disable a multicast address */
557 static int
558 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
559 {
560 	_NOTE(ARGUNUSED(add, mca))
561 
562 	vnet_t *vnetp = arg;
563 	vp_tl_t		*vp_tlp;
564 	mac_register_t	*vp_macp;
565 	mac_callbacks_t	*cbp;
566 	int rv = VNET_SUCCESS;
567 
568 	DBG1((vnetp, "vnet_m_multicst: enter\n"));
569 	READ_ENTER(&vnetp->trwlock);
570 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
571 		if (strcmp(vnetp->vgen_name, vp_tlp->name) == 0) {
572 			vp_macp = vp_tlp->macp;
573 			cbp = vp_macp->m_callbacks;
574 			rv = cbp->mc_multicst(vp_macp->m_driver, add, mca);
575 			break;
576 		}
577 	}
578 	RW_EXIT(&vnetp->trwlock);
579 	DBG1((vnetp, "vnet_m_multicst: exit\n"));
580 	return (rv);
581 }
582 
583 /* set or clear promiscuous mode on the device */
584 static int
585 vnet_m_promisc(void *arg, boolean_t on)
586 {
587 	_NOTE(ARGUNUSED(on))
588 
589 	vnet_t *vnetp = arg;
590 	DBG1((vnetp, "vnet_m_promisc: enter\n"));
591 	/*
592 	 * XXX: setting promiscuous mode is not supported, just return success.
593 	 */
594 	DBG1((vnetp, "vnet_m_promisc: exit\n"));
595 	return (VNET_SUCCESS);
596 }
597 
598 /*
599  * Transmit a chain of packets. This function provides switching functionality
600  * based on the destination mac address to reach other guests (within ldoms) or
601  * external hosts.
602  */
603 mblk_t *
604 vnet_m_tx(void *arg, mblk_t *mp)
605 {
606 	vnet_t *vnetp;
607 	mblk_t *next;
608 	uint32_t fdbhash;
609 	fdb_t *fdbp;
610 	fdb_fanout_t *fdbhp;
611 	struct ether_header *ehp;
612 	uint8_t *macaddr;
613 	mblk_t *resid_mp;
614 
615 	vnetp = (vnet_t *)arg;
616 	DBG1((vnetp, "vnet_m_tx: enter\n"));
617 	ASSERT(mp != NULL);
618 
619 	while (mp != NULL) {
620 		next = mp->b_next;
621 		mp->b_next = NULL;
622 
623 		/* get the destination mac address in the eth header */
624 		ehp = (struct ether_header *)mp->b_rptr;
625 		macaddr = (uint8_t *)&ehp->ether_dhost;
626 
627 		/* Calculate hash value and fdb fanout */
628 		fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
629 		fdbhp = &(vnetp->fdbhp[fdbhash]);
630 
631 		READ_ENTER(&fdbhp->rwlock);
632 		fdbp = vnet_lookup_fdb(fdbhp, macaddr);
633 		if (fdbp) {
634 			/*
635 			 * If the destination is in FDB, the destination is
636 			 * a vnet device within ldoms and directly reachable,
637 			 * invoke the tx function in the fdb entry.
638 			 */
639 			resid_mp = fdbp->m_tx(fdbp->txarg, mp);
640 			if (resid_mp != NULL) {
641 				/* m_tx failed */
642 				mp->b_next = next;
643 				RW_EXIT(&fdbhp->rwlock);
644 				break;
645 			}
646 			RW_EXIT(&fdbhp->rwlock);
647 		} else {
648 			/* destination is not in FDB */
649 			RW_EXIT(&fdbhp->rwlock);
650 			/*
651 			 * If the destination is broadcast/multicast
652 			 * or an unknown unicast address, forward the
653 			 * packet to vsw, using the last slot in fdb which is
654 			 * reserved for default route.
655 			 */
656 			fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
657 			READ_ENTER(&fdbhp->rwlock);
658 			fdbp = fdbhp->headp;
659 			if (fdbp) {
660 				resid_mp = fdbp->m_tx(fdbp->txarg, mp);
661 				if (resid_mp != NULL) {
662 					/* m_tx failed */
663 					mp->b_next = next;
664 					RW_EXIT(&fdbhp->rwlock);
665 					break;
666 				}
667 			} else {
668 				/* drop the packet */
669 				freemsg(mp);
670 			}
671 			RW_EXIT(&fdbhp->rwlock);
672 		}
673 
674 		mp = next;
675 	}
676 
677 	DBG1((vnetp, "vnet_m_tx: exit\n"));
678 	return (mp);
679 }
680 
681 /* get statistics from the device */
682 int
683 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
684 {
685 	vnet_t *vnetp = arg;
686 	vp_tl_t	*vp_tlp;
687 	mac_register_t	*vp_macp;
688 	mac_callbacks_t	*cbp;
689 	uint64_t val_total = 0;
690 
691 	DBG1((vnetp, "vnet_m_stat: enter\n"));
692 
693 	/*
694 	 * get the specified statistic from each transport and return the
695 	 * aggregate val.  This obviously only works for counters.
696 	 */
697 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
698 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
699 		return (ENOTSUP);
700 	}
701 	READ_ENTER(&vnetp->trwlock);
702 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
703 		vp_macp = vp_tlp->macp;
704 		cbp = vp_macp->m_callbacks;
705 		if (cbp->mc_getstat(vp_macp->m_driver, stat, val) == 0)
706 			val_total += *val;
707 	}
708 	RW_EXIT(&vnetp->trwlock);
709 
710 	*val = val_total;
711 
712 	DBG1((vnetp, "vnet_m_stat: exit\n"));
713 	return (0);
714 }
715 
716 /* wrapper function for mac_register() */
717 static int
718 vnet_mac_register(vnet_t *vnetp)
719 {
720 	mac_register_t	*macp;
721 	int		err;
722 
723 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
724 		return (DDI_FAILURE);
725 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
726 	macp->m_driver = vnetp;
727 	macp->m_dip = vnetp->dip;
728 	macp->m_src_addr = vnetp->curr_macaddr;
729 	macp->m_callbacks = &vnet_m_callbacks;
730 	macp->m_min_sdu = 0;
731 	macp->m_max_sdu = ETHERMTU;
732 
733 	/*
734 	 * Finally, we're ready to register ourselves with the MAC layer
735 	 * interface; if this succeeds, we're all ready to start()
736 	 */
737 	err = mac_register(macp, &vnetp->mh);
738 	mac_free(macp);
739 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
740 }
741 
742 /* add vp_tl to the list */
743 static void
744 vnet_add_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp)
745 {
746 	vp_tl_t *ttlp;
747 
748 	WRITE_ENTER(&vnetp->trwlock);
749 	if (vnetp->tlp == NULL) {
750 		vnetp->tlp = vp_tlp;
751 	} else {
752 		ttlp = vnetp->tlp;
753 		while (ttlp->nextp)
754 			ttlp = ttlp->nextp;
755 		ttlp->nextp = vp_tlp;
756 	}
757 	RW_EXIT(&vnetp->trwlock);
758 }
759 
760 /* remove vp_tl from the list */
761 static void
762 vnet_del_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp)
763 {
764 	vp_tl_t *ttlp, **pretlp;
765 	boolean_t found = B_FALSE;
766 
767 	pretlp = &vnetp->tlp;
768 	ttlp = *pretlp;
769 	while (ttlp) {
770 		if (ttlp == vp_tlp) {
771 			found = B_TRUE;
772 			(*pretlp) = ttlp->nextp;
773 			ttlp->nextp = NULL;
774 			break;
775 		}
776 		pretlp = &(ttlp->nextp);
777 		ttlp = *pretlp;
778 	}
779 
780 	if (found) {
781 		KMEM_FREE(vp_tlp);
782 	}
783 }
784 
785 /* get vp_tl corresponding to the given name */
786 static vp_tl_t *
787 vnet_get_vptl(vnet_t *vnetp, const char *name)
788 {
789 	vp_tl_t *tlp;
790 
791 	tlp = vnetp->tlp;
792 	while (tlp) {
793 		if (strcmp(tlp->name, name) == 0) {
794 			return (tlp);
795 		}
796 		tlp = tlp->nextp;
797 	}
798 	DWARN((vnetp,
799 	    "vnet_get_vptl: can't find vp_tl with name (%s)\n", name));
800 	return (NULL);
801 }
802 
803 /* read the mac address of the device */
804 static int
805 vnet_read_mac_address(vnet_t *vnetp)
806 {
807 	uchar_t 	*macaddr;
808 	uint32_t 	size;
809 	int 		rv;
810 
811 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
812 		DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
813 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
814 		DWARN((vnetp,
815 		"vnet_read_mac_address: prop_lookup failed (%s) err (%d)\n",
816 		macaddr_propname, rv));
817 		return (DDI_FAILURE);
818 	}
819 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
820 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
821 	ddi_prop_free(macaddr);
822 
823 	return (DDI_SUCCESS);
824 }
825 
826 
827 /*
828  * Functions below are called only by generic transport to add/remove/modify
829  * entries in forwarding database. See comments in vgen_port_init(vnet_gen.c).
830  */
831 
832 /* add an entry into the forwarding database */
833 void
834 vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg)
835 {
836 	vnet_t *vnetp = (vnet_t *)arg;
837 	uint32_t fdbhash;
838 	fdb_t *fdbp;
839 	fdb_fanout_t *fdbhp;
840 
841 	/* Calculate hash value and fdb fanout */
842 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
843 	fdbhp = &(vnetp->fdbhp[fdbhash]);
844 
845 	WRITE_ENTER(&fdbhp->rwlock);
846 
847 	fdbp = kmem_zalloc(sizeof (fdb_t), KM_NOSLEEP);
848 	if (fdbp == NULL) {
849 		RW_EXIT(&fdbhp->rwlock);
850 		return;
851 	}
852 	bcopy(macaddr, (caddr_t)fdbp->macaddr, ETHERADDRL);
853 	fdbp->m_tx = m_tx;
854 	fdbp->txarg = txarg;
855 	fdbp->nextp = fdbhp->headp;
856 	fdbhp->headp = fdbp;
857 
858 	RW_EXIT(&fdbhp->rwlock);
859 }
860 
861 /* delete an entry from the forwarding database */
862 void
863 vnet_del_fdb(void *arg, uint8_t *macaddr)
864 {
865 	vnet_t *vnetp = (vnet_t *)arg;
866 	uint32_t fdbhash;
867 	fdb_t *fdbp;
868 	fdb_t **pfdbp;
869 	fdb_fanout_t *fdbhp;
870 
871 	/* Calculate hash value and fdb fanout */
872 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
873 	fdbhp = &(vnetp->fdbhp[fdbhash]);
874 
875 	WRITE_ENTER(&fdbhp->rwlock);
876 
877 	for (pfdbp = &fdbhp->headp; (fdbp  = *pfdbp) != NULL;
878 	    pfdbp = &fdbp->nextp) {
879 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
880 			/* Unlink it from the list */
881 			*pfdbp = fdbp->nextp;
882 			KMEM_FREE(fdbp);
883 			break;
884 		}
885 	}
886 
887 	RW_EXIT(&fdbhp->rwlock);
888 }
889 
890 /* modify an existing entry in the forwarding database */
891 void
892 vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg)
893 {
894 	vnet_t *vnetp = (vnet_t *)arg;
895 	uint32_t fdbhash;
896 	fdb_t *fdbp;
897 	fdb_fanout_t *fdbhp;
898 
899 	/* Calculate hash value and fdb fanout */
900 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
901 	fdbhp = &(vnetp->fdbhp[fdbhash]);
902 
903 	WRITE_ENTER(&fdbhp->rwlock);
904 
905 	for (fdbp = fdbhp->headp; fdbp != NULL; fdbp = fdbp->nextp) {
906 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
907 			/* change the entry to have new tx params */
908 			fdbp->m_tx = m_tx;
909 			fdbp->txarg = txarg;
910 			break;
911 		}
912 	}
913 
914 	RW_EXIT(&fdbhp->rwlock);
915 }
916 
917 /* look up an fdb entry based on the mac address, caller holds lock */
918 static fdb_t *
919 vnet_lookup_fdb(fdb_fanout_t *fdbhp, uint8_t *macaddr)
920 {
921 	fdb_t *fdbp = NULL;
922 
923 	for (fdbp = fdbhp->headp; fdbp != NULL; fdbp = fdbp->nextp) {
924 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
925 			break;
926 		}
927 	}
928 
929 	return (fdbp);
930 }
931 
932 /* add default route entry into the forwarding database */
933 void
934 vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg)
935 {
936 	vnet_t *vnetp = (vnet_t *)arg;
937 	fdb_t *fdbp;
938 	fdb_fanout_t *fdbhp;
939 
940 	/*
941 	 * The last hash list is reserved for default route entry,
942 	 * and for now, we have only one entry in this list.
943 	 */
944 	fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
945 
946 	WRITE_ENTER(&fdbhp->rwlock);
947 
948 	if (fdbhp->headp) {
949 		DWARN((vnetp,
950 		    "vnet_add_def_rte: default rte already exists\n"));
951 		RW_EXIT(&fdbhp->rwlock);
952 		return;
953 	}
954 	fdbp = kmem_zalloc(sizeof (fdb_t), KM_NOSLEEP);
955 	if (fdbp == NULL) {
956 		RW_EXIT(&fdbhp->rwlock);
957 		return;
958 	}
959 	bzero(fdbp->macaddr, ETHERADDRL);
960 	fdbp->m_tx = m_tx;
961 	fdbp->txarg = txarg;
962 	fdbp->nextp = NULL;
963 	fdbhp->headp = fdbp;
964 
965 	RW_EXIT(&fdbhp->rwlock);
966 }
967 
968 /* delete default route entry from the forwarding database */
969 void
970 vnet_del_def_rte(void *arg)
971 {
972 	vnet_t *vnetp = (vnet_t *)arg;
973 	fdb_t *fdbp;
974 	fdb_fanout_t *fdbhp;
975 
976 	/*
977 	 * The last hash list is reserved for default route entry,
978 	 * and for now, we have only one entry in this list.
979 	 */
980 	fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
981 
982 	WRITE_ENTER(&fdbhp->rwlock);
983 
984 	if (fdbhp->headp == NULL) {
985 		RW_EXIT(&fdbhp->rwlock);
986 		return;
987 	}
988 	fdbp = fdbhp->headp;
989 	KMEM_FREE(fdbp);
990 	fdbhp->headp = NULL;
991 
992 	RW_EXIT(&fdbhp->rwlock);
993 }
994 
995 void
996 vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
997 {
998 	vnet_t *vnetp = arg;
999 	mac_rx(vnetp->mh, mrh, mp);
1000 }
1001 
1002 void
1003 vnet_tx_update(void *arg)
1004 {
1005 	vnet_t *vnetp = arg;
1006 	mac_tx_update(vnetp->mh);
1007 }
1008