xref: /illumos-gate/usr/src/uts/sun4v/io/vnet.c (revision 560f878bce5cdf0661659001415019ca5c8a01b4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/dlpi.h>
42 #include <net/if.h>
43 #include <sys/mac.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/strsun.h>
47 #include <sys/note.h>
48 #include <sys/vnet.h>
49 
50 /*
51  * Function prototypes.
52  */
53 
54 /* DDI entrypoints */
55 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
56 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
57 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
58 
59 /* MAC entrypoints  */
60 static uint64_t vnet_m_stat(void *arg, enum mac_stat stat);
61 static int vnet_m_start(void *);
62 static void vnet_m_stop(void *);
63 static int vnet_m_promisc(void *, boolean_t);
64 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
65 static int vnet_m_unicst(void *, const uint8_t *);
66 static void vnet_m_resources(void *);
67 static void vnet_m_ioctl(void *, queue_t *, mblk_t *);
68 mblk_t *vnet_m_tx(void *, mblk_t *);
69 
70 /* vnet internal functions */
71 static int vnet_mac_register(vnet_t *);
72 static int vnet_read_mac_address(vnet_t *vnetp);
73 static void vnet_add_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp);
74 static void vnet_del_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp);
75 static vp_tl_t *vnet_get_vptl(vnet_t *vnetp, const char *devname);
76 static fdb_t *vnet_lookup_fdb(fdb_fanout_t *fdbhp, uint8_t *macaddr);
77 
78 /* exported functions */
79 void vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
80 void vnet_del_fdb(void *arg, uint8_t *macaddr);
81 void vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
82 void vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg);
83 void vnet_del_def_rte(void *arg);
84 
85 /* externs */
86 extern int vgen_init(void *vnetp, dev_info_t *vnetdip, void *vnetmacp,
87 	const uint8_t *macaddr, mac_t **vgenmacp);
88 extern void vgen_uninit(void *arg);
89 
90 /*
91  * Linked list of "vnet_t" structures - one per instance.
92  */
93 static vnet_t	*vnet_headp = NULL;
94 static krwlock_t vnet_rw;
95 
96 /* Tunables */
97 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
98 uint32_t vnet_reclaim_lowat = VNET_RECLAIM_LOWAT;  /* tx recl low watermark */
99 uint32_t vnet_reclaim_hiwat = VNET_RECLAIM_HIWAT;  /* tx recl high watermark */
100 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
101 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
102 uint32_t vnet_ldc_qlen = VNET_LDC_QLEN;		/* ldc qlen */
103 uint32_t vnet_nfdb_hash = VNET_NFDB_HASH;	/* size of fdb hash table */
104 
105 /*
106  * Property names
107  */
108 static char macaddr_propname[] = "local-mac-address";
109 
110 static struct ether_addr etherbroadcastaddr = {
111 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
112 };
113 
114 /*
115  * MIB II broadcast/multicast packets
116  */
117 #define	IS_BROADCAST(ehp) \
118 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
119 #define	IS_MULTICAST(ehp) \
120 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
121 
122 /*
123  * This is the string displayed by modinfo(1m).
124  */
125 static char vnet_ident[] = "vnet driver v%I%";
126 extern struct mod_ops mod_driverops;
127 static struct cb_ops cb_vnetops = {
128 	nulldev,		/* cb_open */
129 	nulldev,		/* cb_close */
130 	nodev,			/* cb_strategy */
131 	nodev,			/* cb_print */
132 	nodev,			/* cb_dump */
133 	nodev,			/* cb_read */
134 	nodev,			/* cb_write */
135 	nodev,			/* cb_ioctl */
136 	nodev,			/* cb_devmap */
137 	nodev,			/* cb_mmap */
138 	nodev,			/* cb_segmap */
139 	nochpoll,		/* cb_chpoll */
140 	ddi_prop_op,		/* cb_prop_op */
141 	NULL,			/* cb_stream */
142 	(int)(D_MP)		/* cb_flag */
143 };
144 
145 static struct dev_ops vnetops = {
146 	DEVO_REV,		/* devo_rev */
147 	0,			/* devo_refcnt */
148 	NULL,			/* devo_getinfo */
149 	nulldev,		/* devo_identify */
150 	nulldev,		/* devo_probe */
151 	vnetattach,		/* devo_attach */
152 	vnetdetach,		/* devo_detach */
153 	nodev,			/* devo_reset */
154 	&cb_vnetops,		/* devo_cb_ops */
155 	(struct bus_ops *)NULL	/* devo_bus_ops */
156 };
157 
158 static struct modldrv modldrv = {
159 	&mod_driverops,		/* Type of module.  This one is a driver */
160 	vnet_ident,		/* ID string */
161 	&vnetops		/* driver specific ops */
162 };
163 
164 static struct modlinkage modlinkage = {
165 	MODREV_1, (void *)&modldrv, NULL
166 };
167 
168 
169 /*
170  * Print debug messages - set to 0xf to enable all msgs
171  */
172 int _vnet_dbglevel = 0x8;
173 
174 void
175 _vnetdebug_printf(void *arg, const char *fmt, ...)
176 {
177 	char    buf[512];
178 	va_list ap;
179 	vnet_t *vnetp = (vnet_t *)arg;
180 
181 	va_start(ap, fmt);
182 	(void) vsprintf(buf, fmt, ap);
183 	va_end(ap);
184 
185 	if (vnetp == NULL)
186 		cmn_err(CE_CONT, "%s\n", buf);
187 	else
188 		cmn_err(CE_CONT, "vnet%d: %s\n", vnetp->instance, buf);
189 }
190 
191 #ifdef DEBUG
192 
193 /*
194  * XXX: any changes to the definitions below need corresponding changes in
195  * vnet_gen.c
196  */
197 
198 /*
199  * debug levels:
200  * DBG_LEVEL1:	Function entry/exit tracing
201  * DBG_LEVEL2:	Info messages
202  * DBG_LEVEL3:	Warning messages
203  * DBG_LEVEL4:	Error messages
204  */
205 
206 enum	{ DBG_LEVEL1 = 0x01, DBG_LEVEL2 = 0x02, DBG_LEVEL3 = 0x04,
207 	    DBG_LEVEL4 = 0x08 };
208 
209 #define	DBG1(_s)	do {						\
210 			    if ((_vnet_dbglevel & DBG_LEVEL1) != 0) {	\
211 					_vnetdebug_printf _s;		\
212 			    }					\
213 			_NOTE(CONSTCOND) } while (0)
214 
215 #define	DBG2(_s)	do {						\
216 			    if ((_vnet_dbglevel & DBG_LEVEL2) != 0) {	\
217 					_vnetdebug_printf _s;		\
218 			    }					\
219 			_NOTE(CONSTCOND) } while (0)
220 
221 #define	DWARN(_s)	do {						\
222 			    if ((_vnet_dbglevel & DBG_LEVEL3) != 0) {	\
223 					_vnetdebug_printf _s;		\
224 			    }					\
225 			_NOTE(CONSTCOND) } while (0)
226 
227 #define	DERR(_s)	do {						\
228 			    if ((_vnet_dbglevel & DBG_LEVEL4) != 0) {	\
229 					_vnetdebug_printf _s;		\
230 			    }					\
231 			_NOTE(CONSTCOND) } while (0)
232 
233 #else
234 
235 #define	DBG1(_s)	if (0)	_vnetdebug_printf _s
236 #define	DBG2(_s)	if (0)	_vnetdebug_printf _s
237 #define	DWARN(_s)	if (0)	_vnetdebug_printf _s
238 #define	DERR(_s)	if (0)	_vnetdebug_printf _s
239 
240 #endif
241 
242 /* _init(9E): initialize the loadable module */
243 int
244 _init(void)
245 {
246 	int status;
247 
248 	DBG1((NULL, "_init: enter\n"));
249 
250 	mac_init_ops(&vnetops, "vnet");
251 	status = mod_install(&modlinkage);
252 	if (status != 0) {
253 		mac_fini_ops(&vnetops);
254 	}
255 
256 	DBG1((NULL, "_init: exit\n"));
257 	return (status);
258 }
259 
260 /* _fini(9E): prepare the module for unloading. */
261 int
262 _fini(void)
263 {
264 	int status;
265 
266 	DBG1((NULL, "_fini: enter\n"));
267 
268 	status = mod_remove(&modlinkage);
269 	if (status != 0)
270 		return (status);
271 	mac_fini_ops(&vnetops);
272 
273 	DBG1((NULL, "_fini: exit\n"));
274 	return (status);
275 }
276 
277 /* _info(9E): return information about the loadable module */
278 int
279 _info(struct modinfo *modinfop)
280 {
281 	return (mod_info(&modlinkage, modinfop));
282 }
283 
284 /*
285  * attach(9E): attach a device to the system.
286  * called once for each instance of the device on the system.
287  */
288 static int
289 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
290 {
291 	mac_t		*macp;
292 	vnet_t		*vnetp;
293 	vp_tl_t		*vp_tlp;
294 	int		instance;
295 	int		status;
296 	enum		{ AST_init = 0x0, AST_vnet_alloc = 0x1,
297 			    AST_mac_alloc = 0x2, AST_read_macaddr = 0x4,
298 			    AST_vgen_init = 0x8, AST_vptl_alloc = 0x10,
299 			    AST_fdbh_alloc = 0x20 }
300 			attach_state;
301 	mac_t		*vgenmacp = NULL;
302 	uint32_t	nfdbh = 0;
303 
304 	attach_state = AST_init;
305 
306 	switch (cmd) {
307 	case DDI_ATTACH:
308 		break;
309 	case DDI_RESUME:
310 	case DDI_PM_RESUME:
311 	default:
312 		goto vnet_attach_fail;
313 	}
314 
315 	instance = ddi_get_instance(dip);
316 	DBG1((NULL, "vnetattach: instance(%d) enter\n", instance));
317 
318 	/* allocate vnet_t and mac_t structures */
319 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
320 	attach_state |= AST_vnet_alloc;
321 
322 	macp = kmem_zalloc(sizeof (mac_t), KM_SLEEP);
323 	attach_state |= AST_mac_alloc;
324 
325 	/* setup links to vnet_t from both devinfo and mac_t */
326 	ddi_set_driver_private(dip, (caddr_t)vnetp);
327 	macp->m_driver = vnetp;
328 	vnetp->dip = dip;
329 	vnetp->macp = macp;
330 	vnetp->instance = instance;
331 
332 	/* read the mac address */
333 	status = vnet_read_mac_address(vnetp);
334 	if (status != DDI_SUCCESS) {
335 		goto vnet_attach_fail;
336 	}
337 	attach_state |= AST_read_macaddr;
338 
339 	/*
340 	 * Initialize the generic vnet proxy transport. This is the first
341 	 * and default transport used by vnet. The generic transport
342 	 * is provided by using sun4v LDC (logical domain channel). On success,
343 	 * vgen_init() provides a pointer to mac_t of generic transport.
344 	 * Currently, this generic layer provides network connectivity to other
345 	 * vnets within ldoms and also to remote hosts oustide ldoms through
346 	 * the virtual switch (vsw) device on domain0. In the future, when
347 	 * physical adapters that are able to share their resources (such as
348 	 * dma channels) with guest domains become available, the vnet device
349 	 * will use hardware specific driver to communicate directly over the
350 	 * physical device to reach remote hosts without going through vswitch.
351 	 */
352 	status = vgen_init(vnetp, vnetp->dip, vnetp->macp,
353 	    (uint8_t *)vnetp->curr_macaddr, &vgenmacp);
354 	if (status != DDI_SUCCESS) {
355 		DERR((vnetp, "vgen_init() failed\n"));
356 		goto vnet_attach_fail;
357 	}
358 	attach_state |= AST_vgen_init;
359 
360 	vp_tlp = kmem_zalloc(sizeof (vp_tl_t), KM_SLEEP);
361 	vp_tlp->macp = vgenmacp;
362 	(void) snprintf(vp_tlp->name, MAXNAMELEN, "%s%u", "vgen", instance);
363 	(void) strcpy(vnetp->vgen_name, vp_tlp->name);
364 
365 	/* add generic transport to the list of vnet proxy transports */
366 	vnet_add_vptl(vnetp, vp_tlp);
367 	attach_state |= AST_vptl_alloc;
368 
369 	nfdbh = vnet_nfdb_hash;
370 	if ((nfdbh < VNET_NFDB_HASH) || (nfdbh > VNET_NFDB_HASH_MAX)) {
371 		vnetp->nfdb_hash = VNET_NFDB_HASH;
372 	}
373 	else
374 		vnetp->nfdb_hash = nfdbh;
375 
376 	/* allocate fdb hash table, with an extra slot for default route */
377 	vnetp->fdbhp = kmem_zalloc(sizeof (fdb_fanout_t) *
378 	    (vnetp->nfdb_hash + 1), KM_SLEEP);
379 	attach_state |= AST_fdbh_alloc;
380 
381 	/* register with MAC layer */
382 	status = vnet_mac_register(vnetp);
383 	if (status != DDI_SUCCESS) {
384 		goto vnet_attach_fail;
385 	}
386 
387 	/* add to the list of vnet devices */
388 	WRITE_ENTER(&vnet_rw);
389 	vnetp->nextp = vnet_headp;
390 	vnet_headp = vnetp;
391 	RW_EXIT(&vnet_rw);
392 
393 	DBG1((NULL, "vnetattach: instance(%d) exit\n", instance));
394 	return (DDI_SUCCESS);
395 
396 vnet_attach_fail:
397 	if (attach_state & AST_fdbh_alloc) {
398 		kmem_free(vnetp->fdbhp,
399 		    sizeof (fdb_fanout_t) * (vnetp->nfdb_hash + 1));
400 	}
401 	if (attach_state & AST_vptl_alloc) {
402 		WRITE_ENTER(&vnetp->trwlock);
403 		vnet_del_vptl(vnetp, vp_tlp);
404 		RW_EXIT(&vnetp->trwlock);
405 	}
406 	if (attach_state & AST_vgen_init) {
407 		vgen_uninit(vgenmacp->m_driver);
408 	}
409 	if (attach_state & AST_mac_alloc) {
410 		KMEM_FREE(macp);
411 	}
412 	if (attach_state & AST_vnet_alloc) {
413 		KMEM_FREE(vnetp);
414 	}
415 	return (DDI_FAILURE);
416 }
417 
418 /*
419  * detach(9E): detach a device from the system.
420  */
421 static int
422 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
423 {
424 	vnet_t		*vnetp;
425 	vnet_t		**vnetpp;
426 	vp_tl_t		*vp_tlp;
427 	int		instance;
428 
429 	instance = ddi_get_instance(dip);
430 	DBG1((NULL, "vnetdetach: instance(%d) enter\n", instance));
431 
432 	vnetp = ddi_get_driver_private(dip);
433 	if (vnetp == NULL) {
434 		goto vnet_detach_fail;
435 	}
436 
437 	switch (cmd) {
438 	case DDI_DETACH:
439 		break;
440 	case DDI_SUSPEND:
441 	case DDI_PM_SUSPEND:
442 	default:
443 		goto vnet_detach_fail;
444 	}
445 
446 	/*
447 	 * Unregister from the MAC subsystem.  This can fail, in
448 	 * particular if there are DLPI style-2 streams still open -
449 	 * in which case we just return failure.
450 	 */
451 	if (mac_unregister(vnetp->macp) != 0)
452 		goto vnet_detach_fail;
453 
454 	/* unlink from instance(vnet_t) list */
455 	WRITE_ENTER(&vnet_rw);
456 	for (vnetpp = &vnet_headp; *vnetpp; vnetpp = &(*vnetpp)->nextp) {
457 		if (*vnetpp == vnetp) {
458 			*vnetpp = vnetp->nextp;
459 			break;
460 		}
461 	}
462 	RW_EXIT(&vnet_rw);
463 
464 	/* uninit and free vnet proxy transports */
465 	WRITE_ENTER(&vnetp->trwlock);
466 	while ((vp_tlp = vnetp->tlp) != NULL) {
467 		if (strcmp(vnetp->vgen_name, vp_tlp->name) == 0) {
468 			/* uninitialize generic transport */
469 			vgen_uninit(vp_tlp->macp->m_driver);
470 		}
471 		vnet_del_vptl(vnetp, vp_tlp);
472 	}
473 	RW_EXIT(&vnetp->trwlock);
474 
475 	KMEM_FREE(vnetp->macp);
476 	KMEM_FREE(vnetp);
477 
478 	return (DDI_SUCCESS);
479 
480 vnet_detach_fail:
481 	return (DDI_FAILURE);
482 }
483 
484 /* enable the device for transmit/receive */
485 static int
486 vnet_m_start(void *arg)
487 {
488 	vnet_t		*vnetp = arg;
489 	vp_tl_t		*vp_tlp;
490 	mac_t		*vp_macp;
491 
492 	DBG1((vnetp, "vnet_m_start: enter\n"));
493 
494 	/*
495 	 * XXX
496 	 * Currently, we only have generic transport. m_start() invokes
497 	 * vgen_start() which enables ports/channels in vgen and
498 	 * initiates handshake with peer vnets and vsw. In the future when we
499 	 * have support for hardware specific transports, this information
500 	 * needs to be propagted back to vnet from vgen and we need to revisit
501 	 * this code (see comments in vnet_attach()).
502 	 *
503 	 */
504 	WRITE_ENTER(&vnetp->trwlock);
505 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
506 		vp_macp = vp_tlp->macp;
507 		vp_macp->m_start(vp_macp->m_driver);
508 	}
509 	RW_EXIT(&vnetp->trwlock);
510 
511 	DBG1((vnetp, "vnet_m_start: exit\n"));
512 	return (VNET_SUCCESS);
513 
514 }
515 
516 /* stop transmit/receive for the device */
517 static void
518 vnet_m_stop(void *arg)
519 {
520 	vnet_t		*vnetp = arg;
521 	vp_tl_t		*vp_tlp;
522 	mac_t		*vp_macp;
523 
524 	DBG1((vnetp, "vnet_m_stop: enter\n"));
525 
526 	WRITE_ENTER(&vnetp->trwlock);
527 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
528 		vp_macp = vp_tlp->macp;
529 		vp_macp->m_stop(vp_macp->m_driver);
530 	}
531 	RW_EXIT(&vnetp->trwlock);
532 
533 	DBG1((vnetp, "vnet_m_stop: exit\n"));
534 }
535 
536 /* set the unicast mac address of the device */
537 static int
538 vnet_m_unicst(void *arg, const uint8_t *macaddr)
539 {
540 	_NOTE(ARGUNUSED(macaddr))
541 
542 	vnet_t *vnetp = arg;
543 
544 	DBG1((vnetp, "vnet_m_unicst: enter\n"));
545 	/*
546 	 * XXX: setting mac address dynamically is not supported.
547 	 */
548 	DBG1((vnetp, "vnet_m_unicst: exit\n"));
549 
550 	return (VNET_FAILURE);
551 }
552 
553 /* enable/disable a multicast address */
554 static int
555 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
556 {
557 	_NOTE(ARGUNUSED(add, mca))
558 
559 	vnet_t *vnetp = arg;
560 	vp_tl_t		*vp_tlp;
561 	mac_t		*vp_macp;
562 	int rv = VNET_SUCCESS;
563 
564 	DBG1((vnetp, "vnet_m_multicst: enter\n"));
565 	READ_ENTER(&vnetp->trwlock);
566 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
567 		if (strcmp(vnetp->vgen_name, vp_tlp->name) == 0) {
568 			vp_macp = vp_tlp->macp;
569 			rv = vp_macp->m_multicst(vp_macp->m_driver, add, mca);
570 			break;
571 		}
572 	}
573 	RW_EXIT(&vnetp->trwlock);
574 	DBG1((vnetp, "vnet_m_multicst: exit\n"));
575 	return (rv);
576 }
577 
578 /* set or clear promiscuous mode on the device */
579 static int
580 vnet_m_promisc(void *arg, boolean_t on)
581 {
582 	_NOTE(ARGUNUSED(on))
583 
584 	vnet_t *vnetp = arg;
585 	DBG1((vnetp, "vnet_m_promisc: enter\n"));
586 	/*
587 	 * XXX: setting promiscuous mode is not supported, just return success.
588 	 */
589 	DBG1((vnetp, "vnet_m_promisc: exit\n"));
590 	return (VNET_SUCCESS);
591 }
592 
593 /*
594  * Transmit a chain of packets. This function provides switching functionality
595  * based on the destination mac address to reach other guests (within ldoms) or
596  * external hosts.
597  */
598 mblk_t *
599 vnet_m_tx(void *arg, mblk_t *mp)
600 {
601 	vnet_t *vnetp;
602 	mblk_t *next;
603 	uint32_t fdbhash;
604 	fdb_t *fdbp;
605 	fdb_fanout_t *fdbhp;
606 	struct ether_header *ehp;
607 	uint8_t *macaddr;
608 	mblk_t *resid_mp;
609 
610 	vnetp = (vnet_t *)arg;
611 	DBG1((vnetp, "vnet_m_tx: enter\n"));
612 	ASSERT(mp != NULL);
613 
614 	while (mp != NULL) {
615 		next = mp->b_next;
616 		mp->b_next = NULL;
617 
618 		/* get the destination mac address in the eth header */
619 		ehp = (struct ether_header *)mp->b_rptr;
620 		macaddr = (uint8_t *)&ehp->ether_dhost;
621 
622 		/* Calculate hash value and fdb fanout */
623 		fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
624 		fdbhp = &(vnetp->fdbhp[fdbhash]);
625 
626 		READ_ENTER(&fdbhp->rwlock);
627 		fdbp = vnet_lookup_fdb(fdbhp, macaddr);
628 		if (fdbp) {
629 			/*
630 			 * If the destination is in FDB, the destination is
631 			 * a vnet device within ldoms and directly reachable,
632 			 * invoke the tx function in the fdb entry.
633 			 */
634 			resid_mp = fdbp->m_tx(fdbp->txarg, mp);
635 			if (resid_mp != NULL) {
636 				/* m_tx failed */
637 				mp->b_next = next;
638 				RW_EXIT(&fdbhp->rwlock);
639 				break;
640 			}
641 			RW_EXIT(&fdbhp->rwlock);
642 		} else {
643 			/* destination is not in FDB */
644 			RW_EXIT(&fdbhp->rwlock);
645 			/*
646 			 * If the destination is broadcast/multicast
647 			 * or an unknown unicast address, forward the
648 			 * packet to vsw, using the last slot in fdb which is
649 			 * reserved for default route.
650 			 */
651 			fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
652 			READ_ENTER(&fdbhp->rwlock);
653 			fdbp = fdbhp->headp;
654 			if (fdbp) {
655 				resid_mp = fdbp->m_tx(fdbp->txarg, mp);
656 				if (resid_mp != NULL) {
657 					/* m_tx failed */
658 					mp->b_next = next;
659 					RW_EXIT(&fdbhp->rwlock);
660 					break;
661 				}
662 			} else {
663 				/* drop the packet */
664 				freemsg(mp);
665 			}
666 			RW_EXIT(&fdbhp->rwlock);
667 		}
668 
669 		mp = next;
670 	}
671 
672 	DBG1((vnetp, "vnet_m_tx: exit\n"));
673 	return (mp);
674 }
675 
676 /* register resources with mac layer */
677 static void
678 vnet_m_resources(void *arg)
679 {
680 	vnet_t *vnetp = arg;
681 	vp_tl_t	*vp_tlp;
682 	mac_t	*vp_macp;
683 
684 	DBG1((vnetp, "vnet_m_resources: enter\n"));
685 
686 	WRITE_ENTER(&vnetp->trwlock);
687 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
688 		vp_macp = vp_tlp->macp;
689 		vp_macp->m_resources(vp_macp->m_driver);
690 	}
691 	RW_EXIT(&vnetp->trwlock);
692 
693 	DBG1((vnetp, "vnet_m_resources: exit\n"));
694 }
695 
696 /*
697  * vnet specific ioctls
698  */
699 static void
700 vnet_m_ioctl(void *arg, queue_t *wq, mblk_t *mp)
701 {
702 	vnet_t *vnetp = (vnet_t *)arg;
703 	struct iocblk *iocp;
704 	int cmd;
705 
706 	DBG1((vnetp, "vnet_m_ioctl: enter\n"));
707 
708 	iocp = (struct iocblk *)mp->b_rptr;
709 	iocp->ioc_error = 0;
710 	cmd = iocp->ioc_cmd;
711 	switch (cmd) {
712 	default:
713 		miocnak(wq, mp, 0, EINVAL);
714 		break;
715 	}
716 	DBG1((vnetp, "vnet_m_ioctl: exit\n"));
717 }
718 
719 /* get statistics from the device */
720 uint64_t
721 vnet_m_stat(void *arg, enum mac_stat stat)
722 {
723 	vnet_t *vnetp = arg;
724 	vp_tl_t	*vp_tlp;
725 	mac_t	*vp_macp;
726 	uint64_t val = 0;
727 
728 	DBG1((vnetp, "vnet_m_stat: enter\n"));
729 
730 	/*
731 	 * get the specified statistic from each transport
732 	 * and return the aggregate val
733 	 */
734 	READ_ENTER(&vnetp->trwlock);
735 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
736 		vp_macp = vp_tlp->macp;
737 		val += vp_macp->m_stat(vp_macp->m_driver, stat);
738 	}
739 	RW_EXIT(&vnetp->trwlock);
740 
741 	DBG1((vnetp, "vnet_m_stat: exit\n"));
742 	return (val);
743 }
744 
745 /* wrapper function for mac_register() */
746 static int
747 vnet_mac_register(vnet_t *vnetp)
748 {
749 	mac_info_t *mip;
750 	mac_t *macp;
751 
752 	macp = vnetp->macp;
753 
754 	mip = &(macp->m_info);
755 	mip->mi_media = DL_ETHER;
756 	mip->mi_sdu_min = 0;
757 	mip->mi_sdu_max = ETHERMTU;
758 	mip->mi_cksum = 0;
759 	mip->mi_poll = 0; /* DL_CAPAB_POLL ? */
760 	mip->mi_addr_length = ETHERADDRL;
761 	bcopy(&etherbroadcastaddr, mip->mi_brdcst_addr, ETHERADDRL);
762 	bcopy(vnetp->curr_macaddr, mip->mi_unicst_addr, ETHERADDRL);
763 
764 	MAC_STAT_MIB(mip->mi_stat);
765 	mip->mi_stat[MAC_STAT_UNKNOWNS] = B_FALSE;
766 	MAC_STAT_ETHER(mip->mi_stat);
767 	mip->mi_stat[MAC_STAT_SQE_ERRORS] = B_FALSE;
768 	mip->mi_stat[MAC_STAT_MACRCV_ERRORS] = B_FALSE;
769 
770 	macp->m_stat = vnet_m_stat;
771 	macp->m_start = vnet_m_start;
772 	macp->m_stop = vnet_m_stop;
773 	macp->m_promisc = vnet_m_promisc;
774 	macp->m_multicst = vnet_m_multicst;
775 	macp->m_unicst = vnet_m_unicst;
776 	macp->m_resources = vnet_m_resources;
777 	macp->m_ioctl = vnet_m_ioctl;
778 	macp->m_tx = vnet_m_tx;
779 
780 	macp->m_dip = vnetp->dip;
781 	macp->m_ident = MAC_IDENT;
782 
783 	/*
784 	 * Finally, we're ready to register ourselves with the MAC layer
785 	 * interface; if this succeeds, we're all ready to start()
786 	 */
787 	if (mac_register(macp) != 0) {
788 		KMEM_FREE(macp);
789 		return (DDI_FAILURE);
790 	}
791 
792 	return (DDI_SUCCESS);
793 }
794 
795 /* add vp_tl to the list */
796 static void
797 vnet_add_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp)
798 {
799 	vp_tl_t *ttlp;
800 
801 	WRITE_ENTER(&vnetp->trwlock);
802 	if (vnetp->tlp == NULL) {
803 		vnetp->tlp = vp_tlp;
804 	} else {
805 		ttlp = vnetp->tlp;
806 		while (ttlp->nextp)
807 			ttlp = ttlp->nextp;
808 		ttlp->nextp = vp_tlp;
809 	}
810 	RW_EXIT(&vnetp->trwlock);
811 }
812 
813 /* remove vp_tl from the list */
814 static void
815 vnet_del_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp)
816 {
817 	vp_tl_t *ttlp, **pretlp;
818 	boolean_t found = B_FALSE;
819 
820 	pretlp = &vnetp->tlp;
821 	ttlp = *pretlp;
822 	while (ttlp) {
823 		if (ttlp == vp_tlp) {
824 			found = B_TRUE;
825 			(*pretlp) = ttlp->nextp;
826 			ttlp->nextp = NULL;
827 			break;
828 		}
829 		pretlp = &(ttlp->nextp);
830 		ttlp = *pretlp;
831 	}
832 
833 	if (found) {
834 		KMEM_FREE(vp_tlp);
835 	}
836 }
837 
838 /* get vp_tl corresponding to the given name */
839 static vp_tl_t *
840 vnet_get_vptl(vnet_t *vnetp, const char *name)
841 {
842 	vp_tl_t *tlp;
843 
844 	tlp = vnetp->tlp;
845 	while (tlp) {
846 		if (strcmp(tlp->name, name) == 0) {
847 			return (tlp);
848 		}
849 		tlp = tlp->nextp;
850 	}
851 	DWARN((vnetp,
852 	    "vnet_get_vptl: can't find vp_tl with name (%s)\n", name));
853 	return (NULL);
854 }
855 
856 /* read the mac address of the device */
857 static int
858 vnet_read_mac_address(vnet_t *vnetp)
859 {
860 	uchar_t 	*macaddr;
861 	uint32_t 	size;
862 	int 		rv;
863 
864 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
865 		DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
866 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
867 		DWARN((vnetp,
868 		"vnet_read_mac_address: prop_lookup failed (%s) err (%d)\n",
869 		macaddr_propname, rv));
870 		return (DDI_FAILURE);
871 	}
872 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
873 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
874 	ddi_prop_free(macaddr);
875 
876 	return (DDI_SUCCESS);
877 }
878 
879 
880 /*
881  * Functions below are called only by generic transport to add/remove/modify
882  * entries in forwarding database. See comments in vgen_port_init(vnet_gen.c).
883  */
884 
885 /* add an entry into the forwarding database */
886 void
887 vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg)
888 {
889 	vnet_t *vnetp = (vnet_t *)arg;
890 	uint32_t fdbhash;
891 	fdb_t *fdbp;
892 	fdb_fanout_t *fdbhp;
893 
894 	/* Calculate hash value and fdb fanout */
895 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
896 	fdbhp = &(vnetp->fdbhp[fdbhash]);
897 
898 	WRITE_ENTER(&fdbhp->rwlock);
899 
900 	fdbp = kmem_zalloc(sizeof (fdb_t), KM_NOSLEEP);
901 	if (fdbp == NULL) {
902 		RW_EXIT(&fdbhp->rwlock);
903 		return;
904 	}
905 	bcopy(macaddr, (caddr_t)fdbp->macaddr, ETHERADDRL);
906 	fdbp->m_tx = m_tx;
907 	fdbp->txarg = txarg;
908 	fdbp->nextp = fdbhp->headp;
909 	fdbhp->headp = fdbp;
910 
911 	RW_EXIT(&fdbhp->rwlock);
912 }
913 
914 /* delete an entry from the forwarding database */
915 void
916 vnet_del_fdb(void *arg, uint8_t *macaddr)
917 {
918 	vnet_t *vnetp = (vnet_t *)arg;
919 	uint32_t fdbhash;
920 	fdb_t *fdbp;
921 	fdb_t **pfdbp;
922 	fdb_fanout_t *fdbhp;
923 
924 	/* Calculate hash value and fdb fanout */
925 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
926 	fdbhp = &(vnetp->fdbhp[fdbhash]);
927 
928 	WRITE_ENTER(&fdbhp->rwlock);
929 
930 	for (pfdbp = &fdbhp->headp; (fdbp  = *pfdbp) != NULL;
931 	    pfdbp = &fdbp->nextp) {
932 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
933 			/* Unlink it from the list */
934 			*pfdbp = fdbp->nextp;
935 			KMEM_FREE(fdbp);
936 			break;
937 		}
938 	}
939 
940 	RW_EXIT(&fdbhp->rwlock);
941 }
942 
943 /* modify an existing entry in the forwarding database */
944 void
945 vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg)
946 {
947 	vnet_t *vnetp = (vnet_t *)arg;
948 	uint32_t fdbhash;
949 	fdb_t *fdbp;
950 	fdb_fanout_t *fdbhp;
951 
952 	/* Calculate hash value and fdb fanout */
953 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
954 	fdbhp = &(vnetp->fdbhp[fdbhash]);
955 
956 	WRITE_ENTER(&fdbhp->rwlock);
957 
958 	for (fdbp = fdbhp->headp; fdbp != NULL; fdbp = fdbp->nextp) {
959 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
960 			/* change the entry to have new tx params */
961 			fdbp->m_tx = m_tx;
962 			fdbp->txarg = txarg;
963 			break;
964 		}
965 	}
966 
967 	RW_EXIT(&fdbhp->rwlock);
968 }
969 
970 /* look up an fdb entry based on the mac address, caller holds lock */
971 static fdb_t *
972 vnet_lookup_fdb(fdb_fanout_t *fdbhp, uint8_t *macaddr)
973 {
974 	fdb_t *fdbp = NULL;
975 
976 	for (fdbp = fdbhp->headp; fdbp != NULL; fdbp = fdbp->nextp) {
977 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
978 			break;
979 		}
980 	}
981 
982 	return (fdbp);
983 }
984 
985 /* add default route entry into the forwarding database */
986 void
987 vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg)
988 {
989 	vnet_t *vnetp = (vnet_t *)arg;
990 	fdb_t *fdbp;
991 	fdb_fanout_t *fdbhp;
992 
993 	/*
994 	 * The last hash list is reserved for default route entry,
995 	 * and for now, we have only one entry in this list.
996 	 */
997 	fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
998 
999 	WRITE_ENTER(&fdbhp->rwlock);
1000 
1001 	if (fdbhp->headp) {
1002 		DWARN((vnetp,
1003 		    "vnet_add_def_rte: default rte already exists\n"));
1004 		RW_EXIT(&fdbhp->rwlock);
1005 		return;
1006 	}
1007 	fdbp = kmem_zalloc(sizeof (fdb_t), KM_NOSLEEP);
1008 	if (fdbp == NULL) {
1009 		RW_EXIT(&fdbhp->rwlock);
1010 		return;
1011 	}
1012 	bzero(fdbp->macaddr, ETHERADDRL);
1013 	fdbp->m_tx = m_tx;
1014 	fdbp->txarg = txarg;
1015 	fdbp->nextp = NULL;
1016 	fdbhp->headp = fdbp;
1017 
1018 	RW_EXIT(&fdbhp->rwlock);
1019 }
1020 
1021 /* delete default route entry from the forwarding database */
1022 void
1023 vnet_del_def_rte(void *arg)
1024 {
1025 	vnet_t *vnetp = (vnet_t *)arg;
1026 	fdb_t *fdbp;
1027 	fdb_fanout_t *fdbhp;
1028 
1029 	/*
1030 	 * The last hash list is reserved for default route entry,
1031 	 * and for now, we have only one entry in this list.
1032 	 */
1033 	fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
1034 
1035 	WRITE_ENTER(&fdbhp->rwlock);
1036 
1037 	if (fdbhp->headp == NULL) {
1038 		RW_EXIT(&fdbhp->rwlock);
1039 		return;
1040 	}
1041 	fdbp = fdbhp->headp;
1042 	KMEM_FREE(fdbp);
1043 	fdbhp->headp = NULL;
1044 
1045 	RW_EXIT(&fdbhp->rwlock);
1046 }
1047