xref: /titanic_52/usr/src/uts/sun4v/io/vnet.c (revision 5bb86dd8f405a48942aaaab3ca1f410ed7e6db4d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/dlpi.h>
42 #include <net/if.h>
43 #include <sys/mac.h>
44 #include <sys/mac_ether.h>
45 #include <sys/ddi.h>
46 #include <sys/sunddi.h>
47 #include <sys/strsun.h>
48 #include <sys/note.h>
49 #include <sys/vnet.h>
50 
51 /*
52  * Function prototypes.
53  */
54 
55 /* DDI entrypoints */
56 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
57 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
58 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
59 
60 /* MAC entrypoints  */
61 static int vnet_m_stat(void *, uint_t, uint64_t *);
62 static int vnet_m_start(void *);
63 static void vnet_m_stop(void *);
64 static int vnet_m_promisc(void *, boolean_t);
65 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
66 static int vnet_m_unicst(void *, const uint8_t *);
67 mblk_t *vnet_m_tx(void *, mblk_t *);
68 
69 /* vnet internal functions */
70 static int vnet_mac_register(vnet_t *);
71 static int vnet_read_mac_address(vnet_t *vnetp);
72 static void vnet_add_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp);
73 static void vnet_del_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp);
74 static vp_tl_t *vnet_get_vptl(vnet_t *vnetp, const char *devname);
75 static void vnet_fdb_alloc(vnet_t *vnetp);
76 static void vnet_fdb_free(vnet_t *vnetp);
77 static fdb_t *vnet_lookup_fdb(fdb_fanout_t *fdbhp, uint8_t *macaddr);
78 
79 /* exported functions */
80 void vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
81 void vnet_del_fdb(void *arg, uint8_t *macaddr);
82 void vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx,
83 	void *txarg, boolean_t upgrade);
84 void vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg);
85 void vnet_del_def_rte(void *arg);
86 void vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp);
87 void vnet_tx_update(void *arg);
88 
89 /* externs */
90 extern int vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
91 	mac_register_t **vgenmacp);
92 extern int vgen_uninit(void *arg);
93 
94 static mac_callbacks_t vnet_m_callbacks = {
95 	0,
96 	vnet_m_stat,
97 	vnet_m_start,
98 	vnet_m_stop,
99 	vnet_m_promisc,
100 	vnet_m_multicst,
101 	vnet_m_unicst,
102 	vnet_m_tx,
103 	NULL,
104 	NULL,
105 	NULL
106 };
107 
108 /*
109  * Linked list of "vnet_t" structures - one per instance.
110  */
111 static vnet_t	*vnet_headp = NULL;
112 static krwlock_t vnet_rw;
113 
114 /* Tunables */
115 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
116 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
117 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
118 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
119 uint32_t vnet_nfdb_hash = VNET_NFDB_HASH;	/* size of fdb hash table */
120 
121 /*
122  * Property names
123  */
124 static char macaddr_propname[] = "local-mac-address";
125 
126 /*
127  * This is the string displayed by modinfo(1m).
128  */
129 static char vnet_ident[] = "vnet driver v%I%";
130 extern struct mod_ops mod_driverops;
131 static struct cb_ops cb_vnetops = {
132 	nulldev,		/* cb_open */
133 	nulldev,		/* cb_close */
134 	nodev,			/* cb_strategy */
135 	nodev,			/* cb_print */
136 	nodev,			/* cb_dump */
137 	nodev,			/* cb_read */
138 	nodev,			/* cb_write */
139 	nodev,			/* cb_ioctl */
140 	nodev,			/* cb_devmap */
141 	nodev,			/* cb_mmap */
142 	nodev,			/* cb_segmap */
143 	nochpoll,		/* cb_chpoll */
144 	ddi_prop_op,		/* cb_prop_op */
145 	NULL,			/* cb_stream */
146 	(int)(D_MP)		/* cb_flag */
147 };
148 
149 static struct dev_ops vnetops = {
150 	DEVO_REV,		/* devo_rev */
151 	0,			/* devo_refcnt */
152 	NULL,			/* devo_getinfo */
153 	nulldev,		/* devo_identify */
154 	nulldev,		/* devo_probe */
155 	vnetattach,		/* devo_attach */
156 	vnetdetach,		/* devo_detach */
157 	nodev,			/* devo_reset */
158 	&cb_vnetops,		/* devo_cb_ops */
159 	(struct bus_ops *)NULL	/* devo_bus_ops */
160 };
161 
162 static struct modldrv modldrv = {
163 	&mod_driverops,		/* Type of module.  This one is a driver */
164 	vnet_ident,		/* ID string */
165 	&vnetops		/* driver specific ops */
166 };
167 
168 static struct modlinkage modlinkage = {
169 	MODREV_1, (void *)&modldrv, NULL
170 };
171 
172 #ifdef DEBUG
173 
174 /*
175  * Print debug messages - set to 0xf to enable all msgs
176  */
177 int vnet_dbglevel = 0x8;
178 
179 static void
180 debug_printf(const char *fname, void *arg, const char *fmt, ...)
181 {
182 	char    buf[512];
183 	va_list ap;
184 	vnet_t *vnetp = (vnet_t *)arg;
185 	char    *bufp = buf;
186 
187 	if (vnetp == NULL) {
188 		(void) sprintf(bufp, "%s: ", fname);
189 		bufp += strlen(bufp);
190 	} else {
191 		(void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
192 		bufp += strlen(bufp);
193 	}
194 	va_start(ap, fmt);
195 	(void) vsprintf(bufp, fmt, ap);
196 	va_end(ap);
197 	cmn_err(CE_CONT, "%s\n", buf);
198 }
199 
200 #endif
201 
202 /* _init(9E): initialize the loadable module */
203 int
204 _init(void)
205 {
206 	int status;
207 
208 	DBG1(NULL, "enter\n");
209 
210 	mac_init_ops(&vnetops, "vnet");
211 	status = mod_install(&modlinkage);
212 	if (status != 0) {
213 		mac_fini_ops(&vnetops);
214 	}
215 
216 	DBG1(NULL, "exit(%d)\n", status);
217 	return (status);
218 }
219 
220 /* _fini(9E): prepare the module for unloading. */
221 int
222 _fini(void)
223 {
224 	int status;
225 
226 	DBG1(NULL, "enter\n");
227 
228 	status = mod_remove(&modlinkage);
229 	if (status != 0)
230 		return (status);
231 	mac_fini_ops(&vnetops);
232 
233 	DBG1(NULL, "exit(%d)\n", status);
234 	return (status);
235 }
236 
237 /* _info(9E): return information about the loadable module */
238 int
239 _info(struct modinfo *modinfop)
240 {
241 	return (mod_info(&modlinkage, modinfop));
242 }
243 
244 /*
245  * attach(9E): attach a device to the system.
246  * called once for each instance of the device on the system.
247  */
248 static int
249 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
250 {
251 	vnet_t		*vnetp;
252 	vp_tl_t		*vp_tlp;
253 	int		instance;
254 	int		status;
255 	mac_register_t	*vgenmacp = NULL;
256 	enum	{ AST_init = 0x0, AST_vnet_alloc = 0x1,
257 		AST_mac_alloc = 0x2, AST_read_macaddr = 0x4,
258 		AST_vgen_init = 0x8, AST_vptl_alloc = 0x10,
259 		AST_fdbh_alloc = 0x20 } attach_state;
260 
261 	attach_state = AST_init;
262 
263 	switch (cmd) {
264 	case DDI_ATTACH:
265 		break;
266 	case DDI_RESUME:
267 	case DDI_PM_RESUME:
268 	default:
269 		goto vnet_attach_fail;
270 	}
271 
272 	instance = ddi_get_instance(dip);
273 	DBG1(NULL, "instance(%d) enter\n", instance);
274 
275 	/* allocate vnet_t and mac_t structures */
276 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
277 	attach_state |= AST_vnet_alloc;
278 
279 	/* setup links to vnet_t from both devinfo and mac_t */
280 	ddi_set_driver_private(dip, (caddr_t)vnetp);
281 	vnetp->dip = dip;
282 	vnetp->instance = instance;
283 
284 	/* read the mac address */
285 	status = vnet_read_mac_address(vnetp);
286 	if (status != DDI_SUCCESS) {
287 		goto vnet_attach_fail;
288 	}
289 	attach_state |= AST_read_macaddr;
290 
291 	/*
292 	 * Initialize the generic vnet proxy transport. This is the first
293 	 * and default transport used by vnet. The generic transport
294 	 * is provided by using sun4v LDC (logical domain channel). On success,
295 	 * vgen_init() provides a pointer to mac_t of generic transport.
296 	 * Currently, this generic layer provides network connectivity to other
297 	 * vnets within ldoms and also to remote hosts oustide ldoms through
298 	 * the virtual switch (vsw) device on domain0. In the future, when
299 	 * physical adapters that are able to share their resources (such as
300 	 * dma channels) with guest domains become available, the vnet device
301 	 * will use hardware specific driver to communicate directly over the
302 	 * physical device to reach remote hosts without going through vswitch.
303 	 */
304 	status = vgen_init(vnetp, vnetp->dip, (uint8_t *)vnetp->curr_macaddr,
305 	    &vgenmacp);
306 	if (status != DDI_SUCCESS) {
307 		DERR(vnetp, "vgen_init() failed\n");
308 		goto vnet_attach_fail;
309 	}
310 	rw_init(&vnetp->trwlock, NULL, RW_DRIVER, NULL);
311 	attach_state |= AST_vgen_init;
312 
313 	vp_tlp = kmem_zalloc(sizeof (vp_tl_t), KM_SLEEP);
314 	vp_tlp->macp = vgenmacp;
315 	(void) snprintf(vp_tlp->name, MAXNAMELEN, "%s%u", "vgen", instance);
316 	(void) strcpy(vnetp->vgen_name, vp_tlp->name);
317 
318 	/* add generic transport to the list of vnet proxy transports */
319 	vnet_add_vptl(vnetp, vp_tlp);
320 	attach_state |= AST_vptl_alloc;
321 
322 	vnet_fdb_alloc(vnetp);
323 	attach_state |= AST_fdbh_alloc;
324 
325 	/* register with MAC layer */
326 	status = vnet_mac_register(vnetp);
327 	if (status != DDI_SUCCESS) {
328 		goto vnet_attach_fail;
329 	}
330 
331 	/* add to the list of vnet devices */
332 	WRITE_ENTER(&vnet_rw);
333 	vnetp->nextp = vnet_headp;
334 	vnet_headp = vnetp;
335 	RW_EXIT(&vnet_rw);
336 
337 	DBG1(NULL, "instance(%d) exit\n", instance);
338 	return (DDI_SUCCESS);
339 
340 vnet_attach_fail:
341 	if (attach_state & AST_fdbh_alloc) {
342 		vnet_fdb_free(vnetp);
343 	}
344 	if (attach_state & AST_vptl_alloc) {
345 		WRITE_ENTER(&vnetp->trwlock);
346 		vnet_del_vptl(vnetp, vp_tlp);
347 		RW_EXIT(&vnetp->trwlock);
348 	}
349 	if (attach_state & AST_vgen_init) {
350 		(void) vgen_uninit(vgenmacp->m_driver);
351 		rw_destroy(&vnetp->trwlock);
352 	}
353 	if (attach_state & AST_vnet_alloc) {
354 		KMEM_FREE(vnetp);
355 	}
356 	return (DDI_FAILURE);
357 }
358 
359 /*
360  * detach(9E): detach a device from the system.
361  */
362 static int
363 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
364 {
365 	vnet_t		*vnetp;
366 	vnet_t		**vnetpp;
367 	vp_tl_t		*vp_tlp;
368 	int		instance;
369 	int		rv;
370 
371 	instance = ddi_get_instance(dip);
372 	DBG1(NULL, "instance(%d) enter\n", instance);
373 
374 	vnetp = ddi_get_driver_private(dip);
375 	if (vnetp == NULL) {
376 		goto vnet_detach_fail;
377 	}
378 
379 	switch (cmd) {
380 	case DDI_DETACH:
381 		break;
382 	case DDI_SUSPEND:
383 	case DDI_PM_SUSPEND:
384 	default:
385 		goto vnet_detach_fail;
386 	}
387 
388 	/* uninit and free vnet proxy transports */
389 	WRITE_ENTER(&vnetp->trwlock);
390 	while ((vp_tlp = vnetp->tlp) != NULL) {
391 		if (strcmp(vnetp->vgen_name, vp_tlp->name) == 0) {
392 			/* uninitialize generic transport */
393 			rv = vgen_uninit(vp_tlp->macp->m_driver);
394 			if (rv != DDI_SUCCESS) {
395 				RW_EXIT(&vnetp->trwlock);
396 				goto vnet_detach_fail;
397 			}
398 		}
399 		vnet_del_vptl(vnetp, vp_tlp);
400 	}
401 	RW_EXIT(&vnetp->trwlock);
402 
403 	/*
404 	 * Unregister from the MAC subsystem.  This can fail, in
405 	 * particular if there are DLPI style-2 streams still open -
406 	 * in which case we just return failure.
407 	 */
408 	if (mac_unregister(vnetp->mh) != 0)
409 		goto vnet_detach_fail;
410 
411 	/* unlink from instance(vnet_t) list */
412 	WRITE_ENTER(&vnet_rw);
413 	for (vnetpp = &vnet_headp; *vnetpp; vnetpp = &(*vnetpp)->nextp) {
414 		if (*vnetpp == vnetp) {
415 			*vnetpp = vnetp->nextp;
416 			break;
417 		}
418 	}
419 	RW_EXIT(&vnet_rw);
420 
421 	vnet_fdb_free(vnetp);
422 
423 	rw_destroy(&vnetp->trwlock);
424 	KMEM_FREE(vnetp);
425 
426 	return (DDI_SUCCESS);
427 
428 vnet_detach_fail:
429 	return (DDI_FAILURE);
430 }
431 
432 /* enable the device for transmit/receive */
433 static int
434 vnet_m_start(void *arg)
435 {
436 	vnet_t		*vnetp = arg;
437 	vp_tl_t		*vp_tlp;
438 	mac_register_t	*vp_macp;
439 	mac_callbacks_t	*cbp;
440 
441 	DBG1(vnetp, "enter\n");
442 
443 	/*
444 	 * NOTE:
445 	 * Currently, we only have generic transport. m_start() invokes
446 	 * vgen_start() which enables ports/channels in vgen and
447 	 * initiates handshake with peer vnets and vsw. In the future when we
448 	 * have support for hardware specific transports, this information
449 	 * needs to be propagted back to vnet from vgen and we need to revisit
450 	 * this code (see comments in vnet_attach()).
451 	 *
452 	 */
453 	WRITE_ENTER(&vnetp->trwlock);
454 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
455 		vp_macp = vp_tlp->macp;
456 		cbp = vp_macp->m_callbacks;
457 		cbp->mc_start(vp_macp->m_driver);
458 	}
459 	RW_EXIT(&vnetp->trwlock);
460 
461 	DBG1(vnetp, "exit\n");
462 	return (VNET_SUCCESS);
463 
464 }
465 
466 /* stop transmit/receive for the device */
467 static void
468 vnet_m_stop(void *arg)
469 {
470 	vnet_t		*vnetp = arg;
471 	vp_tl_t		*vp_tlp;
472 	mac_register_t	*vp_macp;
473 	mac_callbacks_t	*cbp;
474 
475 	DBG1(vnetp, "enter\n");
476 
477 	WRITE_ENTER(&vnetp->trwlock);
478 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
479 		vp_macp = vp_tlp->macp;
480 		cbp = vp_macp->m_callbacks;
481 		cbp->mc_stop(vp_macp->m_driver);
482 	}
483 	RW_EXIT(&vnetp->trwlock);
484 
485 	DBG1(vnetp, "exit\n");
486 }
487 
488 /* set the unicast mac address of the device */
489 static int
490 vnet_m_unicst(void *arg, const uint8_t *macaddr)
491 {
492 	_NOTE(ARGUNUSED(macaddr))
493 
494 	vnet_t *vnetp = arg;
495 
496 	DBG1(vnetp, "enter\n");
497 	/*
498 	 * NOTE: setting mac address dynamically is not supported.
499 	 */
500 	DBG1(vnetp, "exit\n");
501 
502 	return (VNET_FAILURE);
503 }
504 
505 /* enable/disable a multicast address */
506 static int
507 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
508 {
509 	_NOTE(ARGUNUSED(add, mca))
510 
511 	vnet_t *vnetp = arg;
512 	vp_tl_t		*vp_tlp;
513 	mac_register_t	*vp_macp;
514 	mac_callbacks_t	*cbp;
515 	int rv = VNET_SUCCESS;
516 
517 	DBG1(vnetp, "enter\n");
518 	READ_ENTER(&vnetp->trwlock);
519 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
520 		if (strcmp(vnetp->vgen_name, vp_tlp->name) == 0) {
521 			vp_macp = vp_tlp->macp;
522 			cbp = vp_macp->m_callbacks;
523 			rv = cbp->mc_multicst(vp_macp->m_driver, add, mca);
524 			break;
525 		}
526 	}
527 	RW_EXIT(&vnetp->trwlock);
528 	DBG1(vnetp, "exit(%d)\n", rv);
529 	return (rv);
530 }
531 
532 /* set or clear promiscuous mode on the device */
533 static int
534 vnet_m_promisc(void *arg, boolean_t on)
535 {
536 	_NOTE(ARGUNUSED(on))
537 
538 	vnet_t *vnetp = arg;
539 	DBG1(vnetp, "enter\n");
540 	/*
541 	 * NOTE: setting promiscuous mode is not supported, just return success.
542 	 */
543 	DBG1(vnetp, "exit\n");
544 	return (VNET_SUCCESS);
545 }
546 
547 /*
548  * Transmit a chain of packets. This function provides switching functionality
549  * based on the destination mac address to reach other guests (within ldoms) or
550  * external hosts.
551  */
552 mblk_t *
553 vnet_m_tx(void *arg, mblk_t *mp)
554 {
555 	vnet_t *vnetp;
556 	mblk_t *next;
557 	uint32_t fdbhash;
558 	fdb_t *fdbp;
559 	fdb_fanout_t *fdbhp;
560 	struct ether_header *ehp;
561 	uint8_t *macaddr;
562 	mblk_t *resid_mp;
563 
564 	vnetp = (vnet_t *)arg;
565 	DBG1(vnetp, "enter\n");
566 	ASSERT(mp != NULL);
567 
568 	while (mp != NULL) {
569 		next = mp->b_next;
570 		mp->b_next = NULL;
571 
572 		/* get the destination mac address in the eth header */
573 		ehp = (struct ether_header *)mp->b_rptr;
574 		macaddr = (uint8_t *)&ehp->ether_dhost;
575 
576 		/* Calculate hash value and fdb fanout */
577 		fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
578 		fdbhp = &(vnetp->fdbhp[fdbhash]);
579 
580 		READ_ENTER(&fdbhp->rwlock);
581 		fdbp = vnet_lookup_fdb(fdbhp, macaddr);
582 		if (fdbp) {
583 			/*
584 			 * If the destination is in FDB, the destination is
585 			 * a vnet device within ldoms and directly reachable,
586 			 * invoke the tx function in the fdb entry.
587 			 */
588 			resid_mp = fdbp->m_tx(fdbp->txarg, mp);
589 			if (resid_mp != NULL) {
590 				/* m_tx failed */
591 				mp->b_next = next;
592 				RW_EXIT(&fdbhp->rwlock);
593 				break;
594 			}
595 			RW_EXIT(&fdbhp->rwlock);
596 		} else {
597 			/* destination is not in FDB */
598 			RW_EXIT(&fdbhp->rwlock);
599 			/*
600 			 * If the destination is broadcast/multicast
601 			 * or an unknown unicast address, forward the
602 			 * packet to vsw, using the last slot in fdb which is
603 			 * reserved for default route.
604 			 */
605 			fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
606 			READ_ENTER(&fdbhp->rwlock);
607 			fdbp = fdbhp->headp;
608 			if (fdbp) {
609 				resid_mp = fdbp->m_tx(fdbp->txarg, mp);
610 				if (resid_mp != NULL) {
611 					/* m_tx failed */
612 					mp->b_next = next;
613 					RW_EXIT(&fdbhp->rwlock);
614 					break;
615 				}
616 			} else {
617 				/* drop the packet */
618 				freemsg(mp);
619 			}
620 			RW_EXIT(&fdbhp->rwlock);
621 		}
622 
623 		mp = next;
624 	}
625 
626 	DBG1(vnetp, "exit\n");
627 	return (mp);
628 }
629 
630 /* get statistics from the device */
631 int
632 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
633 {
634 	vnet_t *vnetp = arg;
635 	vp_tl_t	*vp_tlp;
636 	mac_register_t	*vp_macp;
637 	mac_callbacks_t	*cbp;
638 	uint64_t val_total = 0;
639 
640 	DBG1(vnetp, "enter\n");
641 
642 	/*
643 	 * get the specified statistic from each transport and return the
644 	 * aggregate val.  This obviously only works for counters.
645 	 */
646 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
647 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
648 		return (ENOTSUP);
649 	}
650 	READ_ENTER(&vnetp->trwlock);
651 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
652 		vp_macp = vp_tlp->macp;
653 		cbp = vp_macp->m_callbacks;
654 		if (cbp->mc_getstat(vp_macp->m_driver, stat, val) == 0)
655 			val_total += *val;
656 	}
657 	RW_EXIT(&vnetp->trwlock);
658 
659 	*val = val_total;
660 
661 	DBG1(vnetp, "exit\n");
662 	return (0);
663 }
664 
665 /* wrapper function for mac_register() */
666 static int
667 vnet_mac_register(vnet_t *vnetp)
668 {
669 	mac_register_t	*macp;
670 	int		err;
671 
672 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
673 		return (DDI_FAILURE);
674 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
675 	macp->m_driver = vnetp;
676 	macp->m_dip = vnetp->dip;
677 	macp->m_src_addr = vnetp->curr_macaddr;
678 	macp->m_callbacks = &vnet_m_callbacks;
679 	macp->m_min_sdu = 0;
680 	macp->m_max_sdu = ETHERMTU;
681 
682 	/*
683 	 * Finally, we're ready to register ourselves with the MAC layer
684 	 * interface; if this succeeds, we're all ready to start()
685 	 */
686 	err = mac_register(macp, &vnetp->mh);
687 	mac_free(macp);
688 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
689 }
690 
691 /* add vp_tl to the list */
692 static void
693 vnet_add_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp)
694 {
695 	vp_tl_t *ttlp;
696 
697 	WRITE_ENTER(&vnetp->trwlock);
698 	if (vnetp->tlp == NULL) {
699 		vnetp->tlp = vp_tlp;
700 	} else {
701 		ttlp = vnetp->tlp;
702 		while (ttlp->nextp)
703 			ttlp = ttlp->nextp;
704 		ttlp->nextp = vp_tlp;
705 	}
706 	RW_EXIT(&vnetp->trwlock);
707 }
708 
709 /* remove vp_tl from the list */
710 static void
711 vnet_del_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp)
712 {
713 	vp_tl_t *ttlp, **pretlp;
714 	boolean_t found = B_FALSE;
715 
716 	pretlp = &vnetp->tlp;
717 	ttlp = *pretlp;
718 	while (ttlp) {
719 		if (ttlp == vp_tlp) {
720 			found = B_TRUE;
721 			(*pretlp) = ttlp->nextp;
722 			ttlp->nextp = NULL;
723 			break;
724 		}
725 		pretlp = &(ttlp->nextp);
726 		ttlp = *pretlp;
727 	}
728 
729 	if (found) {
730 		KMEM_FREE(vp_tlp);
731 	}
732 }
733 
734 /* get vp_tl corresponding to the given name */
735 static vp_tl_t *
736 vnet_get_vptl(vnet_t *vnetp, const char *name)
737 {
738 	vp_tl_t *tlp;
739 
740 	tlp = vnetp->tlp;
741 	while (tlp) {
742 		if (strcmp(tlp->name, name) == 0) {
743 			return (tlp);
744 		}
745 		tlp = tlp->nextp;
746 	}
747 	DWARN(vnetp, "can't find vp_tl with name (%s)\n", name);
748 	return (NULL);
749 }
750 
751 /* read the mac address of the device */
752 static int
753 vnet_read_mac_address(vnet_t *vnetp)
754 {
755 	uchar_t 	*macaddr;
756 	uint32_t 	size;
757 	int 		rv;
758 
759 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
760 	    DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
761 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
762 		DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
763 		    macaddr_propname, rv);
764 		return (DDI_FAILURE);
765 	}
766 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
767 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
768 	ddi_prop_free(macaddr);
769 
770 	return (DDI_SUCCESS);
771 }
772 
773 
774 /*
775  * Functions below are called only by generic transport to add/remove/modify
776  * entries in forwarding database. See comments in vgen_port_init(vnet_gen.c).
777  */
778 
779 /* add an entry into the forwarding database */
780 void
781 vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg)
782 {
783 	vnet_t *vnetp = (vnet_t *)arg;
784 	uint32_t fdbhash;
785 	fdb_t *fdbp;
786 	fdb_fanout_t *fdbhp;
787 
788 	/* Calculate hash value and fdb fanout */
789 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
790 	fdbhp = &(vnetp->fdbhp[fdbhash]);
791 
792 	WRITE_ENTER(&fdbhp->rwlock);
793 
794 	fdbp = kmem_zalloc(sizeof (fdb_t), KM_NOSLEEP);
795 	if (fdbp == NULL) {
796 		RW_EXIT(&fdbhp->rwlock);
797 		return;
798 	}
799 	bcopy(macaddr, (caddr_t)fdbp->macaddr, ETHERADDRL);
800 	fdbp->m_tx = m_tx;
801 	fdbp->txarg = txarg;
802 	fdbp->nextp = fdbhp->headp;
803 	fdbhp->headp = fdbp;
804 
805 	RW_EXIT(&fdbhp->rwlock);
806 }
807 
808 /* delete an entry from the forwarding database */
809 void
810 vnet_del_fdb(void *arg, uint8_t *macaddr)
811 {
812 	vnet_t *vnetp = (vnet_t *)arg;
813 	uint32_t fdbhash;
814 	fdb_t *fdbp;
815 	fdb_t **pfdbp;
816 	fdb_fanout_t *fdbhp;
817 
818 	/* Calculate hash value and fdb fanout */
819 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
820 	fdbhp = &(vnetp->fdbhp[fdbhash]);
821 
822 	WRITE_ENTER(&fdbhp->rwlock);
823 
824 	for (pfdbp = &fdbhp->headp; (fdbp  = *pfdbp) != NULL;
825 	    pfdbp = &fdbp->nextp) {
826 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
827 			/* Unlink it from the list */
828 			*pfdbp = fdbp->nextp;
829 			KMEM_FREE(fdbp);
830 			break;
831 		}
832 	}
833 
834 	RW_EXIT(&fdbhp->rwlock);
835 }
836 
837 /* modify an existing entry in the forwarding database */
838 void
839 vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg,
840 	boolean_t upgrade)
841 {
842 	vnet_t *vnetp = (vnet_t *)arg;
843 	uint32_t fdbhash;
844 	fdb_t *fdbp;
845 	fdb_fanout_t *fdbhp;
846 
847 	/* Calculate hash value and fdb fanout */
848 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
849 	fdbhp = &(vnetp->fdbhp[fdbhash]);
850 
851 	if (upgrade == B_TRUE) {
852 		/*
853 		 * Caller already holds the lock as a reader. This can
854 		 * occur if this function is invoked in the context
855 		 * of transmit routine - vnet_m_tx(), where the lock
856 		 * is held as a reader before calling the transmit
857 		 * function of an fdb entry (fdbp->m_tx).
858 		 * See comments in vgen_ldcsend() in vnet_gen.c
859 		 */
860 		if (!rw_tryupgrade(&fdbhp->rwlock)) {
861 			RW_EXIT(&fdbhp->rwlock);
862 			WRITE_ENTER(&fdbhp->rwlock);
863 		}
864 	} else {
865 		/* Caller does not hold the lock */
866 		WRITE_ENTER(&fdbhp->rwlock);
867 	}
868 
869 	for (fdbp = fdbhp->headp; fdbp != NULL; fdbp = fdbp->nextp) {
870 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
871 			/* change the entry to have new tx params */
872 			fdbp->m_tx = m_tx;
873 			fdbp->txarg = txarg;
874 			break;
875 		}
876 	}
877 
878 	if (upgrade == B_TRUE) {
879 		/* restore the caller as a reader */
880 		rw_downgrade(&fdbhp->rwlock);
881 	} else {
882 		RW_EXIT(&fdbhp->rwlock);
883 	}
884 }
885 
886 /* allocate the forwarding database */
887 static void
888 vnet_fdb_alloc(vnet_t *vnetp)
889 {
890 	int		i;
891 	uint32_t	nfdbh = 0;
892 
893 	nfdbh = vnet_nfdb_hash;
894 	if ((nfdbh < VNET_NFDB_HASH) || (nfdbh > VNET_NFDB_HASH_MAX)) {
895 		vnetp->nfdb_hash = VNET_NFDB_HASH;
896 	} else {
897 		vnetp->nfdb_hash = nfdbh;
898 	}
899 
900 	/* allocate fdb hash table, with an extra slot for default route */
901 	vnetp->fdbhp = kmem_zalloc(sizeof (fdb_fanout_t) *
902 	    (vnetp->nfdb_hash + 1), KM_SLEEP);
903 
904 	for (i = 0; i <= vnetp->nfdb_hash; i++) {
905 		rw_init(&vnetp->fdbhp[i].rwlock, NULL, RW_DRIVER, NULL);
906 	}
907 }
908 
909 /* free the forwarding database */
910 static void
911 vnet_fdb_free(vnet_t *vnetp)
912 {
913 	int i;
914 
915 	for (i = 0; i <= vnetp->nfdb_hash; i++) {
916 		rw_destroy(&vnetp->fdbhp[i].rwlock);
917 	}
918 
919 	/*
920 	 * deallocate fdb hash table, including an extra slot for default
921 	 * route.
922 	 */
923 	kmem_free(vnetp->fdbhp, sizeof (fdb_fanout_t) * (vnetp->nfdb_hash + 1));
924 	vnetp->fdbhp = NULL;
925 }
926 
927 /* look up an fdb entry based on the mac address, caller holds lock */
928 static fdb_t *
929 vnet_lookup_fdb(fdb_fanout_t *fdbhp, uint8_t *macaddr)
930 {
931 	fdb_t *fdbp = NULL;
932 
933 	for (fdbp = fdbhp->headp; fdbp != NULL; fdbp = fdbp->nextp) {
934 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
935 			break;
936 		}
937 	}
938 
939 	return (fdbp);
940 }
941 
942 /* add default route entry into the forwarding database */
943 void
944 vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg)
945 {
946 	vnet_t *vnetp = (vnet_t *)arg;
947 	fdb_t *fdbp;
948 	fdb_fanout_t *fdbhp;
949 
950 	/*
951 	 * The last hash list is reserved for default route entry,
952 	 * and for now, we have only one entry in this list.
953 	 */
954 	fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
955 
956 	WRITE_ENTER(&fdbhp->rwlock);
957 
958 	if (fdbhp->headp) {
959 		DWARN(vnetp, "default rte already exists\n");
960 		RW_EXIT(&fdbhp->rwlock);
961 		return;
962 	}
963 	fdbp = kmem_zalloc(sizeof (fdb_t), KM_NOSLEEP);
964 	if (fdbp == NULL) {
965 		RW_EXIT(&fdbhp->rwlock);
966 		return;
967 	}
968 	bzero(fdbp->macaddr, ETHERADDRL);
969 	fdbp->m_tx = m_tx;
970 	fdbp->txarg = txarg;
971 	fdbp->nextp = NULL;
972 	fdbhp->headp = fdbp;
973 
974 	RW_EXIT(&fdbhp->rwlock);
975 }
976 
977 /* delete default route entry from the forwarding database */
978 void
979 vnet_del_def_rte(void *arg)
980 {
981 	vnet_t *vnetp = (vnet_t *)arg;
982 	fdb_t *fdbp;
983 	fdb_fanout_t *fdbhp;
984 
985 	/*
986 	 * The last hash list is reserved for default route entry,
987 	 * and for now, we have only one entry in this list.
988 	 */
989 	fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
990 
991 	WRITE_ENTER(&fdbhp->rwlock);
992 
993 	if (fdbhp->headp == NULL) {
994 		RW_EXIT(&fdbhp->rwlock);
995 		return;
996 	}
997 	fdbp = fdbhp->headp;
998 	KMEM_FREE(fdbp);
999 	fdbhp->headp = NULL;
1000 
1001 	RW_EXIT(&fdbhp->rwlock);
1002 }
1003 
1004 void
1005 vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
1006 {
1007 	vnet_t *vnetp = arg;
1008 	mac_rx(vnetp->mh, mrh, mp);
1009 }
1010 
1011 void
1012 vnet_tx_update(void *arg)
1013 {
1014 	vnet_t *vnetp = arg;
1015 	mac_tx_update(vnetp->mh);
1016 }
1017