xref: /illumos-gate/usr/src/uts/sun4v/io/vnet.c (revision d12abe7ce2663ac39e686a14960eb4febf560195)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/dlpi.h>
42 #include <net/if.h>
43 #include <sys/mac.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/strsun.h>
47 #include <sys/note.h>
48 #include <sys/vnet.h>
49 
50 /*
51  * Function prototypes.
52  */
53 
54 /* DDI entrypoints */
55 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
56 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
57 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
58 
59 /* MAC entrypoints  */
60 static uint64_t vnet_m_stat(void *arg, enum mac_stat stat);
61 static int vnet_m_start(void *);
62 static void vnet_m_stop(void *);
63 static int vnet_m_promisc(void *, boolean_t);
64 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
65 static int vnet_m_unicst(void *, const uint8_t *);
66 static void vnet_m_resources(void *);
67 static void vnet_m_ioctl(void *, queue_t *, mblk_t *);
68 mblk_t *vnet_m_tx(void *, mblk_t *);
69 
70 /* vnet internal functions */
71 static int vnet_mac_register(vnet_t *);
72 static int vnet_read_mac_address(vnet_t *vnetp);
73 static void vnet_add_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp);
74 static void vnet_del_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp);
75 static vp_tl_t *vnet_get_vptl(vnet_t *vnetp, const char *devname);
76 static fdb_t *vnet_lookup_fdb(fdb_fanout_t *fdbhp, uint8_t *macaddr);
77 
78 /* exported functions */
79 void vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
80 void vnet_del_fdb(void *arg, uint8_t *macaddr);
81 void vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
82 void vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg);
83 void vnet_del_def_rte(void *arg);
84 
85 /* externs */
86 extern int vgen_init(void *vnetp, dev_info_t *vnetdip, void *vnetmacp,
87 	const uint8_t *macaddr, mac_t **vgenmacp);
88 extern void vgen_uninit(void *arg);
89 
90 /*
91  * Linked list of "vnet_t" structures - one per instance.
92  */
93 static vnet_t	*vnet_headp = NULL;
94 static krwlock_t vnet_rw;
95 
96 /* Tunables */
97 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
98 uint32_t vnet_reclaim_lowat = VNET_RECLAIM_LOWAT;  /* tx recl low watermark */
99 uint32_t vnet_reclaim_hiwat = VNET_RECLAIM_HIWAT;  /* tx recl high watermark */
100 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
101 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
102 uint32_t vnet_ldc_qlen = VNET_LDC_QLEN;		/* ldc qlen */
103 uint32_t vnet_nfdb_hash = VNET_NFDB_HASH;	/* size of fdb hash table */
104 
105 /*
106  * Property names
107  */
108 static char macaddr_propname[] = "local-mac-address";
109 
110 static struct ether_addr etherbroadcastaddr = {
111 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
112 };
113 
114 /*
115  * MIB II broadcast/multicast packets
116  */
117 #define	IS_BROADCAST(ehp) \
118 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
119 #define	IS_MULTICAST(ehp) \
120 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
121 
122 /*
123  * This is the string displayed by modinfo(1m).
124  */
125 static char vnet_ident[] = "vnet driver v1.0";
126 extern struct mod_ops mod_driverops;
127 static struct cb_ops cb_vnetops = {
128 	nulldev,		/* cb_open */
129 	nulldev,		/* cb_close */
130 	nodev,			/* cb_strategy */
131 	nodev,			/* cb_print */
132 	nodev,			/* cb_dump */
133 	nodev,			/* cb_read */
134 	nodev,			/* cb_write */
135 	nodev,			/* cb_ioctl */
136 	nodev,			/* cb_devmap */
137 	nodev,			/* cb_mmap */
138 	nodev,			/* cb_segmap */
139 	nochpoll,		/* cb_chpoll */
140 	ddi_prop_op,		/* cb_prop_op */
141 	NULL,			/* cb_stream */
142 	(int)(D_MP)		/* cb_flag */
143 };
144 
145 static struct dev_ops vnetops = {
146 	DEVO_REV,		/* devo_rev */
147 	0,			/* devo_refcnt */
148 	NULL,			/* devo_getinfo */
149 	nulldev,		/* devo_identify */
150 	nulldev,		/* devo_probe */
151 	vnetattach,		/* devo_attach */
152 	vnetdetach,		/* devo_detach */
153 	nodev,			/* devo_reset */
154 	&cb_vnetops,		/* devo_cb_ops */
155 	(struct bus_ops *)NULL	/* devo_bus_ops */
156 };
157 
158 static struct modldrv modldrv = {
159 	&mod_driverops,		/* Type of module.  This one is a driver */
160 	vnet_ident,		/* ID string */
161 	&vnetops		/* driver specific ops */
162 };
163 
164 static struct modlinkage modlinkage = {
165 	MODREV_1, (void *)&modldrv, NULL
166 };
167 
168 
169 /*
170  * Print debug messages - set to 0xf to enable all msgs
171  */
172 int _vnet_dbglevel = 0x8;
173 
174 void
175 _vnetdebug_printf(void *arg, const char *fmt, ...)
176 {
177 	char    buf[512];
178 	va_list ap;
179 	vnet_t *vnetp = (vnet_t *)arg;
180 
181 	va_start(ap, fmt);
182 	(void) vsprintf(buf, fmt, ap);
183 	va_end(ap);
184 
185 	if (vnetp == NULL)
186 		cmn_err(CE_CONT, "%s\n", buf);
187 	else
188 		cmn_err(CE_CONT, "vnet%d: %s\n", vnetp->instance, buf);
189 }
190 
191 #ifdef DEBUG
192 
193 /*
194  * XXX: any changes to the definitions below need corresponding changes in
195  * vnet_gen.c
196  */
197 
198 /*
199  * debug levels:
200  * DBG_LEVEL1:	Function entry/exit tracing
201  * DBG_LEVEL2:	Info messages
202  * DBG_LEVEL3:	Warning messages
203  * DBG_LEVEL4:	Error messages
204  */
205 
206 enum	{ DBG_LEVEL1 = 0x01, DBG_LEVEL2 = 0x02, DBG_LEVEL3 = 0x04,
207 	    DBG_LEVEL4 = 0x08 };
208 
209 #define	DBG1(_s)	do {						\
210 			    if ((_vnet_dbglevel & DBG_LEVEL1) != 0) {	\
211 					_vnetdebug_printf _s;		\
212 			    }					\
213 			_NOTE(CONSTCOND) } while (0)
214 
215 #define	DBG2(_s)	do {						\
216 			    if ((_vnet_dbglevel & DBG_LEVEL2) != 0) {	\
217 					_vnetdebug_printf _s;		\
218 			    }					\
219 			_NOTE(CONSTCOND) } while (0)
220 
221 #define	DWARN(_s)	do {						\
222 			    if ((_vnet_dbglevel & DBG_LEVEL3) != 0) {	\
223 					_vnetdebug_printf _s;		\
224 			    }					\
225 			_NOTE(CONSTCOND) } while (0)
226 
227 #define	DERR(_s)	do {						\
228 			    if ((_vnet_dbglevel & DBG_LEVEL4) != 0) {	\
229 					_vnetdebug_printf _s;		\
230 			    }					\
231 			_NOTE(CONSTCOND) } while (0)
232 
233 #else
234 
235 #define	DBG1(_s)	if (0)	_vnetdebug_printf _s
236 #define	DBG2(_s)	if (0)	_vnetdebug_printf _s
237 #define	DWARN(_s)	if (0)	_vnetdebug_printf _s
238 #define	DERR(_s)	if (0)	_vnetdebug_printf _s
239 
240 #endif
241 
242 /* _init(9E): initialize the loadable module */
243 int
244 _init(void)
245 {
246 	int status;
247 
248 	DBG1((NULL, "_init: enter\n"));
249 
250 	mac_init_ops(&vnetops, "vnet");
251 	status = mod_install(&modlinkage);
252 	if (status != 0) {
253 		mac_fini_ops(&vnetops);
254 	}
255 
256 	DBG1((NULL, "_init: exit\n"));
257 	return (status);
258 }
259 
260 /* _fini(9E): prepare the module for unloading. */
261 int
262 _fini(void)
263 {
264 	int status;
265 
266 	DBG1((NULL, "_fini: enter\n"));
267 
268 	status = mod_remove(&modlinkage);
269 	if (status != 0)
270 		return (status);
271 	mac_fini_ops(&vnetops);
272 
273 	DBG1((NULL, "_fini: exit\n"));
274 	return (status);
275 }
276 
277 /* _info(9E): return information about the loadable module */
278 int
279 _info(struct modinfo *modinfop)
280 {
281 	return (mod_info(&modlinkage, modinfop));
282 }
283 
284 /*
285  * attach(9E): attach a device to the system.
286  * called once for each instance of the device on the system.
287  */
288 static int
289 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
290 {
291 	mac_t		*macp;
292 	vnet_t		*vnetp;
293 	vp_tl_t		*vp_tlp;
294 	int		instance;
295 	int		status;
296 	enum		{ AST_init = 0x0, AST_vnet_alloc = 0x1,
297 			    AST_mac_alloc = 0x2, AST_read_macaddr = 0x4,
298 			    AST_vgen_init = 0x8, AST_vptl_alloc = 0x10,
299 			    AST_fdbh_alloc = 0x20 }
300 			attach_state;
301 	mac_t		*vgenmacp = NULL;
302 	uint32_t	nfdbh = 0;
303 
304 	attach_state = AST_init;
305 
306 	switch (cmd) {
307 	case DDI_ATTACH:
308 		break;
309 	case DDI_RESUME:
310 	case DDI_PM_RESUME:
311 	default:
312 		goto vnet_attach_fail;
313 	}
314 
315 	instance = ddi_get_instance(dip);
316 	DBG1((NULL, "vnetattach: instance(%d) enter\n", instance));
317 
318 	/* allocate vnet_t and mac_t structures */
319 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
320 	attach_state |= AST_vnet_alloc;
321 
322 	macp = kmem_zalloc(sizeof (mac_t), KM_SLEEP);
323 	attach_state |= AST_mac_alloc;
324 
325 	/* setup links to vnet_t from both devinfo and mac_t */
326 	ddi_set_driver_private(dip, (caddr_t)vnetp);
327 	macp->m_driver = vnetp;
328 	vnetp->dip = dip;
329 	vnetp->macp = macp;
330 	vnetp->instance = instance;
331 
332 	/* read the mac address */
333 	status = vnet_read_mac_address(vnetp);
334 	if (status != DDI_SUCCESS) {
335 		goto vnet_attach_fail;
336 	}
337 	attach_state |= AST_read_macaddr;
338 
339 	/*
340 	 * Initialize the generic vnet proxy transport. This is the first
341 	 * and default transport used by vnet. The generic transport
342 	 * is provided by using sun4v LDC (logical domain channel). On success,
343 	 * vgen_init() provides a pointer to mac_t of generic transport.
344 	 * Currently, this generic layer provides network connectivity to other
345 	 * vnets within ldoms and also to remote hosts oustide ldoms through
346 	 * the virtual switch (vsw) device on domain0. In the future, when
347 	 * physical adapters that are able to share their resources (such as
348 	 * dma channels) with guest domains become available, the vnet device
349 	 * will use hardware specific driver to communicate directly over the
350 	 * physical device to reach remote hosts without going through vswitch.
351 	 */
352 	status = vgen_init(vnetp, vnetp->dip, vnetp->macp,
353 	    (uint8_t *)vnetp->curr_macaddr, &vgenmacp);
354 	if (status != DDI_SUCCESS) {
355 		DERR((vnetp, "vgen_init() failed\n"));
356 		goto vnet_attach_fail;
357 	}
358 	attach_state |= AST_vgen_init;
359 
360 	vp_tlp = kmem_zalloc(sizeof (vp_tl_t), KM_SLEEP);
361 	vp_tlp->macp = vgenmacp;
362 	(void) snprintf(vp_tlp->name, MAXNAMELEN, "%s%u", "vgen", instance);
363 	(void) strcpy(vnetp->vgen_name, vp_tlp->name);
364 
365 	/* add generic transport to the list of vnet proxy transports */
366 	vnet_add_vptl(vnetp, vp_tlp);
367 	attach_state |= AST_vptl_alloc;
368 
369 	nfdbh = vnet_nfdb_hash;
370 	if ((nfdbh < VNET_NFDB_HASH) || (nfdbh > VNET_NFDB_HASH_MAX)) {
371 		vnetp->nfdb_hash = VNET_NFDB_HASH;
372 	}
373 	else
374 		vnetp->nfdb_hash = nfdbh;
375 
376 	/* allocate fdb hash table, with an extra slot for default route */
377 	vnetp->fdbhp = kmem_zalloc(sizeof (fdb_fanout_t) *
378 	    (vnetp->nfdb_hash + 1), KM_SLEEP);
379 	attach_state |= AST_fdbh_alloc;
380 
381 	/* register with MAC layer */
382 	status = vnet_mac_register(vnetp);
383 	if (status != DDI_SUCCESS) {
384 		goto vnet_attach_fail;
385 	}
386 
387 	/* add to the list of vnet devices */
388 	WRITE_ENTER(&vnet_rw);
389 	vnetp->nextp = vnet_headp;
390 	vnet_headp = vnetp;
391 	RW_EXIT(&vnet_rw);
392 
393 	DBG1((NULL, "vnetattach: instance(%d) exit\n", instance));
394 	return (DDI_SUCCESS);
395 
396 vnet_attach_fail:
397 	if (attach_state & AST_fdbh_alloc) {
398 		kmem_free(vnetp->fdbhp,
399 		    sizeof (fdb_fanout_t) * (vnetp->nfdb_hash + 1));
400 	}
401 	if (attach_state & AST_vptl_alloc) {
402 		WRITE_ENTER(&vnetp->trwlock);
403 		vnet_del_vptl(vnetp, vp_tlp);
404 		RW_EXIT(&vnetp->trwlock);
405 	}
406 	if (attach_state & AST_vgen_init) {
407 		vgen_uninit(vgenmacp->m_driver);
408 	}
409 	if (attach_state & AST_mac_alloc) {
410 		KMEM_FREE(macp);
411 	}
412 	if (attach_state & AST_vnet_alloc) {
413 		KMEM_FREE(vnetp);
414 	}
415 	return (DDI_FAILURE);
416 }
417 
418 /*
419  * detach(9E): detach a device from the system.
420  */
421 static int
422 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
423 {
424 	vnet_t		*vnetp;
425 	vnet_t		**vnetpp;
426 	vp_tl_t		*vp_tlp;
427 	int		instance;
428 
429 	instance = ddi_get_instance(dip);
430 	DBG1((NULL, "vnetdetach: instance(%d) enter\n", instance));
431 
432 	vnetp = ddi_get_driver_private(dip);
433 	if (vnetp == NULL) {
434 		goto vnet_detach_fail;
435 	}
436 
437 	switch (cmd) {
438 	case DDI_DETACH:
439 		break;
440 	case DDI_SUSPEND:
441 	case DDI_PM_SUSPEND:
442 	default:
443 		goto vnet_detach_fail;
444 	}
445 
446 	/*
447 	 * Unregister from the MAC subsystem.  This can fail, in
448 	 * particular if there are DLPI style-2 streams still open -
449 	 * in which case we just return failure.
450 	 */
451 	if (mac_unregister(vnetp->macp) != 0)
452 		goto vnet_detach_fail;
453 
454 	/* unlink from instance(vnet_t) list */
455 	WRITE_ENTER(&vnet_rw);
456 	for (vnetpp = &vnet_headp; *vnetpp; vnetpp = &(*vnetpp)->nextp) {
457 		if (*vnetpp == vnetp) {
458 			*vnetpp = vnetp->nextp;
459 			break;
460 		}
461 	}
462 	RW_EXIT(&vnet_rw);
463 
464 	/* uninit and free vnet proxy transports */
465 	WRITE_ENTER(&vnetp->trwlock);
466 	while ((vp_tlp = vnetp->tlp) != NULL) {
467 		if (strcmp(vnetp->vgen_name, vp_tlp->name) == 0) {
468 			/* uninitialize generic transport */
469 			vgen_uninit(vp_tlp->macp->m_driver);
470 		}
471 		vnet_del_vptl(vnetp, vp_tlp);
472 	}
473 	RW_EXIT(&vnetp->trwlock);
474 
475 	KMEM_FREE(vnetp->macp);
476 	KMEM_FREE(vnetp);
477 
478 	return (DDI_SUCCESS);
479 
480 vnet_detach_fail:
481 	return (DDI_FAILURE);
482 }
483 
484 /* enable the device for transmit/receive */
485 static int
486 vnet_m_start(void *arg)
487 {
488 	vnet_t		*vnetp = arg;
489 	vp_tl_t		*vp_tlp;
490 	mac_t		*vp_macp;
491 
492 	DBG1((vnetp, "vnet_m_start: enter\n"));
493 
494 	/*
495 	 * XXX
496 	 * Currently, we only have generic transport. m_start() invokes
497 	 * vgen_start() which enables ports/channels in vgen and
498 	 * initiates handshake with peer vnets and vsw. In the future when we
499 	 * have support for hardware specific transports, this information
500 	 * needs to be propagted back to vnet from vgen and we need to revisit
501 	 * this code (see comments in vnet_attach()).
502 	 *
503 	 */
504 	WRITE_ENTER(&vnetp->trwlock);
505 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
506 		vp_macp = vp_tlp->macp;
507 		vp_macp->m_start(vp_macp->m_driver);
508 	}
509 	RW_EXIT(&vnetp->trwlock);
510 
511 	DBG1((vnetp, "vnet_m_start: exit\n"));
512 	return (VNET_SUCCESS);
513 
514 }
515 
516 /* stop transmit/receive for the device */
517 static void
518 vnet_m_stop(void *arg)
519 {
520 	vnet_t		*vnetp = arg;
521 	vp_tl_t		*vp_tlp;
522 	mac_t		*vp_macp;
523 
524 	DBG1((vnetp, "vnet_m_stop: enter\n"));
525 
526 	WRITE_ENTER(&vnetp->trwlock);
527 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
528 		vp_macp = vp_tlp->macp;
529 		vp_macp->m_stop(vp_macp->m_driver);
530 	}
531 	RW_EXIT(&vnetp->trwlock);
532 
533 	DBG1((vnetp, "vnet_m_stop: exit\n"));
534 }
535 
536 /* set the unicast mac address of the device */
537 static int
538 vnet_m_unicst(void *arg, const uint8_t *macaddr)
539 {
540 	_NOTE(ARGUNUSED(macaddr))
541 
542 	vnet_t *vnetp = arg;
543 
544 	DBG1((vnetp, "vnet_m_unicst: enter\n"));
545 	/*
546 	 * XXX: setting mac address dynamically is not supported.
547 	 */
548 #if 0
549 	bcopy(macaddr, vnetp->curr_macaddr, ETHERADDRL);
550 #endif
551 	DBG1((vnetp, "vnet_m_unicst: exit\n"));
552 
553 	return (VNET_SUCCESS);
554 }
555 
556 /* enable/disable a multicast address */
557 static int
558 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
559 {
560 	_NOTE(ARGUNUSED(add, mca))
561 
562 	vnet_t *vnetp = arg;
563 	vp_tl_t		*vp_tlp;
564 	mac_t		*vp_macp;
565 	int rv = VNET_SUCCESS;
566 
567 	DBG1((vnetp, "vnet_m_multicst: enter\n"));
568 	READ_ENTER(&vnetp->trwlock);
569 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
570 		if (strcmp(vnetp->vgen_name, vp_tlp->name) == 0) {
571 			vp_macp = vp_tlp->macp;
572 			rv = vp_macp->m_multicst(vp_macp->m_driver, add, mca);
573 			break;
574 		}
575 	}
576 	RW_EXIT(&vnetp->trwlock);
577 	DBG1((vnetp, "vnet_m_multicst: exit\n"));
578 	return (rv);
579 }
580 
581 /* set or clear promiscuous mode on the device */
582 static int
583 vnet_m_promisc(void *arg, boolean_t on)
584 {
585 	_NOTE(ARGUNUSED(on))
586 
587 	vnet_t *vnetp = arg;
588 	DBG1((vnetp, "vnet_m_promisc: enter\n"));
589 	/*
590 	 * XXX: setting promiscuous mode is not supported, just return success.
591 	 */
592 	DBG1((vnetp, "vnet_m_promisc: exit\n"));
593 	return (VNET_SUCCESS);
594 }
595 
596 /*
597  * Transmit a chain of packets. This function provides switching functionality
598  * based on the destination mac address to reach other guests (within ldoms) or
599  * external hosts.
600  */
601 mblk_t *
602 vnet_m_tx(void *arg, mblk_t *mp)
603 {
604 	vnet_t *vnetp;
605 	mblk_t *next;
606 	uint32_t fdbhash;
607 	fdb_t *fdbp;
608 	fdb_fanout_t *fdbhp;
609 	struct ether_header *ehp;
610 	uint8_t *macaddr;
611 	mblk_t *resid_mp;
612 
613 	vnetp = (vnet_t *)arg;
614 	DBG1((vnetp, "vnet_m_tx: enter\n"));
615 	ASSERT(mp != NULL);
616 
617 	while (mp != NULL) {
618 		next = mp->b_next;
619 		mp->b_next = NULL;
620 
621 		/* get the destination mac address in the eth header */
622 		ehp = (struct ether_header *)mp->b_rptr;
623 		macaddr = (uint8_t *)&ehp->ether_dhost;
624 
625 		/* Calculate hash value and fdb fanout */
626 		fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
627 		fdbhp = &(vnetp->fdbhp[fdbhash]);
628 
629 		READ_ENTER(&fdbhp->rwlock);
630 		fdbp = vnet_lookup_fdb(fdbhp, macaddr);
631 		if (fdbp) {
632 			/*
633 			 * If the destination is in FDB, the destination is
634 			 * a vnet device within ldoms and directly reachable,
635 			 * invoke the tx function in the fdb entry.
636 			 */
637 			resid_mp = fdbp->m_tx(fdbp->txarg, mp);
638 			if (resid_mp != NULL) {
639 				/* m_tx failed */
640 				mp->b_next = next;
641 				RW_EXIT(&fdbhp->rwlock);
642 				break;
643 			}
644 			RW_EXIT(&fdbhp->rwlock);
645 		} else {
646 			/* destination is not in FDB */
647 			RW_EXIT(&fdbhp->rwlock);
648 			/*
649 			 * If the destination is broadcast/multicast
650 			 * or an unknown unicast address, forward the
651 			 * packet to vsw, using the last slot in fdb which is
652 			 * reserved for default route.
653 			 */
654 			fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
655 			READ_ENTER(&fdbhp->rwlock);
656 			fdbp = fdbhp->headp;
657 			if (fdbp) {
658 				resid_mp = fdbp->m_tx(fdbp->txarg, mp);
659 				if (resid_mp != NULL) {
660 					/* m_tx failed */
661 					mp->b_next = next;
662 					RW_EXIT(&fdbhp->rwlock);
663 					break;
664 				}
665 			} else {
666 				/* drop the packet */
667 				freemsg(mp);
668 			}
669 			RW_EXIT(&fdbhp->rwlock);
670 		}
671 
672 		mp = next;
673 	}
674 
675 	DBG1((vnetp, "vnet_m_tx: exit\n"));
676 	return (mp);
677 }
678 
679 /* register resources with mac layer */
680 static void
681 vnet_m_resources(void *arg)
682 {
683 	vnet_t *vnetp = arg;
684 	vp_tl_t	*vp_tlp;
685 	mac_t	*vp_macp;
686 
687 	DBG1((vnetp, "vnet_m_resources: enter\n"));
688 
689 	WRITE_ENTER(&vnetp->trwlock);
690 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
691 		vp_macp = vp_tlp->macp;
692 		vp_macp->m_resources(vp_macp->m_driver);
693 	}
694 	RW_EXIT(&vnetp->trwlock);
695 
696 	DBG1((vnetp, "vnet_m_resources: exit\n"));
697 }
698 
699 /*
700  * vnet specific ioctls
701  */
702 static void
703 vnet_m_ioctl(void *arg, queue_t *wq, mblk_t *mp)
704 {
705 	vnet_t *vnetp = (vnet_t *)arg;
706 	struct iocblk *iocp;
707 	int cmd;
708 
709 	DBG1((vnetp, "vnet_m_ioctl: enter\n"));
710 
711 	iocp = (struct iocblk *)mp->b_rptr;
712 	iocp->ioc_error = 0;
713 	cmd = iocp->ioc_cmd;
714 	switch (cmd) {
715 	default:
716 		miocnak(wq, mp, 0, EINVAL);
717 		break;
718 	}
719 	DBG1((vnetp, "vnet_m_ioctl: exit\n"));
720 }
721 
722 /* get statistics from the device */
723 uint64_t
724 vnet_m_stat(void *arg, enum mac_stat stat)
725 {
726 	vnet_t *vnetp = arg;
727 	vp_tl_t	*vp_tlp;
728 	mac_t	*vp_macp;
729 	uint64_t val = 0;
730 
731 	DBG1((vnetp, "vnet_m_stat: enter\n"));
732 
733 	/*
734 	 * get the specified statistic from each transport
735 	 * and return the aggregate val
736 	 */
737 	READ_ENTER(&vnetp->trwlock);
738 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
739 		vp_macp = vp_tlp->macp;
740 		val += vp_macp->m_stat(vp_macp->m_driver, stat);
741 	}
742 	RW_EXIT(&vnetp->trwlock);
743 
744 	DBG1((vnetp, "vnet_m_stat: exit\n"));
745 	return (val);
746 }
747 
748 /* wrapper function for mac_register() */
749 static int
750 vnet_mac_register(vnet_t *vnetp)
751 {
752 	mac_info_t *mip;
753 	mac_t *macp;
754 
755 	macp = vnetp->macp;
756 
757 	mip = &(macp->m_info);
758 	mip->mi_media = DL_ETHER;
759 	mip->mi_sdu_min = 0;
760 	mip->mi_sdu_max = ETHERMTU;
761 	mip->mi_cksum = 0;
762 	mip->mi_poll = 0; /* DL_CAPAB_POLL ? */
763 	mip->mi_addr_length = ETHERADDRL;
764 	bcopy(&etherbroadcastaddr, mip->mi_brdcst_addr, ETHERADDRL);
765 	bcopy(vnetp->curr_macaddr, mip->mi_unicst_addr, ETHERADDRL);
766 
767 	MAC_STAT_MIB(mip->mi_stat);
768 	mip->mi_stat[MAC_STAT_UNKNOWNS] = B_FALSE;
769 	MAC_STAT_ETHER(mip->mi_stat);
770 	mip->mi_stat[MAC_STAT_SQE_ERRORS] = B_FALSE;
771 	mip->mi_stat[MAC_STAT_MACRCV_ERRORS] = B_FALSE;
772 
773 	macp->m_stat = vnet_m_stat;
774 	macp->m_start = vnet_m_start;
775 	macp->m_stop = vnet_m_stop;
776 	macp->m_promisc = vnet_m_promisc;
777 	macp->m_multicst = vnet_m_multicst;
778 	macp->m_unicst = vnet_m_unicst;
779 	macp->m_resources = vnet_m_resources;
780 	macp->m_ioctl = vnet_m_ioctl;
781 	macp->m_tx = vnet_m_tx;
782 
783 	macp->m_dip = vnetp->dip;
784 	macp->m_ident = MAC_IDENT;
785 
786 	/*
787 	 * Finally, we're ready to register ourselves with the MAC layer
788 	 * interface; if this succeeds, we're all ready to start()
789 	 */
790 	if (mac_register(macp) != 0) {
791 		KMEM_FREE(macp);
792 		return (DDI_FAILURE);
793 	}
794 
795 	return (DDI_SUCCESS);
796 }
797 
798 /* add vp_tl to the list */
799 static void
800 vnet_add_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp)
801 {
802 	vp_tl_t *ttlp;
803 
804 	WRITE_ENTER(&vnetp->trwlock);
805 	if (vnetp->tlp == NULL) {
806 		vnetp->tlp = vp_tlp;
807 	} else {
808 		ttlp = vnetp->tlp;
809 		while (ttlp->nextp)
810 			ttlp = ttlp->nextp;
811 		ttlp->nextp = vp_tlp;
812 	}
813 	RW_EXIT(&vnetp->trwlock);
814 }
815 
816 /* remove vp_tl from the list */
817 static void
818 vnet_del_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp)
819 {
820 	vp_tl_t *ttlp, **pretlp;
821 	boolean_t found = B_FALSE;
822 
823 	pretlp = &vnetp->tlp;
824 	ttlp = *pretlp;
825 	while (ttlp) {
826 		if (ttlp == vp_tlp) {
827 			found = B_TRUE;
828 			(*pretlp) = ttlp->nextp;
829 			ttlp->nextp = NULL;
830 			break;
831 		}
832 		pretlp = &(ttlp->nextp);
833 		ttlp = *pretlp;
834 	}
835 
836 	if (found) {
837 		KMEM_FREE(vp_tlp);
838 	}
839 }
840 
841 /* get vp_tl corresponding to the given name */
842 static vp_tl_t *
843 vnet_get_vptl(vnet_t *vnetp, const char *name)
844 {
845 	vp_tl_t *tlp;
846 
847 	tlp = vnetp->tlp;
848 	while (tlp) {
849 		if (strcmp(tlp->name, name) == 0) {
850 			return (tlp);
851 		}
852 		tlp = tlp->nextp;
853 	}
854 	DWARN((vnetp,
855 	    "vnet_get_vptl: can't find vp_tl with name (%s)\n", name));
856 	return (NULL);
857 }
858 
859 /* read the mac address of the device */
860 static int
861 vnet_read_mac_address(vnet_t *vnetp)
862 {
863 	uchar_t 	*macaddr;
864 	uint32_t 	size;
865 	int 		rv;
866 
867 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
868 		DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
869 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
870 		DWARN((vnetp,
871 		"vnet_read_mac_address: prop_lookup failed (%s) err (%d)\n",
872 		macaddr_propname, rv));
873 		return (DDI_FAILURE);
874 	}
875 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
876 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
877 	ddi_prop_free(macaddr);
878 
879 	return (DDI_SUCCESS);
880 }
881 
882 
883 /*
884  * Functions below are called only by generic transport to add/remove/modify
885  * entries in forwarding database. See comments in vgen_port_init(vnet_gen.c).
886  */
887 
888 /* add an entry into the forwarding database */
889 void
890 vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg)
891 {
892 	vnet_t *vnetp = (vnet_t *)arg;
893 	uint32_t fdbhash;
894 	fdb_t *fdbp;
895 	fdb_fanout_t *fdbhp;
896 
897 	/* Calculate hash value and fdb fanout */
898 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
899 	fdbhp = &(vnetp->fdbhp[fdbhash]);
900 
901 	WRITE_ENTER(&fdbhp->rwlock);
902 
903 	fdbp = kmem_zalloc(sizeof (fdb_t), KM_NOSLEEP);
904 	if (fdbp == NULL) {
905 		RW_EXIT(&fdbhp->rwlock);
906 		return;
907 	}
908 	bcopy(macaddr, (caddr_t)fdbp->macaddr, ETHERADDRL);
909 	fdbp->m_tx = m_tx;
910 	fdbp->txarg = txarg;
911 	fdbp->nextp = fdbhp->headp;
912 	fdbhp->headp = fdbp;
913 
914 	RW_EXIT(&fdbhp->rwlock);
915 }
916 
917 /* delete an entry from the forwarding database */
918 void
919 vnet_del_fdb(void *arg, uint8_t *macaddr)
920 {
921 	vnet_t *vnetp = (vnet_t *)arg;
922 	uint32_t fdbhash;
923 	fdb_t *fdbp;
924 	fdb_t **pfdbp;
925 	fdb_fanout_t *fdbhp;
926 
927 	/* Calculate hash value and fdb fanout */
928 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
929 	fdbhp = &(vnetp->fdbhp[fdbhash]);
930 
931 	WRITE_ENTER(&fdbhp->rwlock);
932 
933 	for (pfdbp = &fdbhp->headp; (fdbp  = *pfdbp) != NULL;
934 	    pfdbp = &fdbp->nextp) {
935 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
936 			/* Unlink it from the list */
937 			*pfdbp = fdbp->nextp;
938 			KMEM_FREE(fdbp);
939 			break;
940 		}
941 	}
942 
943 	RW_EXIT(&fdbhp->rwlock);
944 }
945 
946 /* modify an existing entry in the forwarding database */
947 void
948 vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg)
949 {
950 	vnet_t *vnetp = (vnet_t *)arg;
951 	uint32_t fdbhash;
952 	fdb_t *fdbp;
953 	fdb_fanout_t *fdbhp;
954 
955 	/* Calculate hash value and fdb fanout */
956 	fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
957 	fdbhp = &(vnetp->fdbhp[fdbhash]);
958 
959 	WRITE_ENTER(&fdbhp->rwlock);
960 
961 	for (fdbp = fdbhp->headp; fdbp != NULL; fdbp = fdbp->nextp) {
962 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
963 			/* change the entry to have new tx params */
964 			fdbp->m_tx = m_tx;
965 			fdbp->txarg = txarg;
966 			break;
967 		}
968 	}
969 
970 	RW_EXIT(&fdbhp->rwlock);
971 }
972 
973 /* look up an fdb entry based on the mac address, caller holds lock */
974 static fdb_t *
975 vnet_lookup_fdb(fdb_fanout_t *fdbhp, uint8_t *macaddr)
976 {
977 	fdb_t *fdbp = NULL;
978 
979 	for (fdbp = fdbhp->headp; fdbp != NULL; fdbp = fdbp->nextp) {
980 		if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
981 			break;
982 		}
983 	}
984 
985 	return (fdbp);
986 }
987 
988 /* add default route entry into the forwarding database */
989 void
990 vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg)
991 {
992 	vnet_t *vnetp = (vnet_t *)arg;
993 	fdb_t *fdbp;
994 	fdb_fanout_t *fdbhp;
995 
996 	/*
997 	 * The last hash list is reserved for default route entry,
998 	 * and for now, we have only one entry in this list.
999 	 */
1000 	fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
1001 
1002 	WRITE_ENTER(&fdbhp->rwlock);
1003 
1004 	if (fdbhp->headp) {
1005 		DWARN((vnetp,
1006 		    "vnet_add_def_rte: default rte already exists\n"));
1007 		RW_EXIT(&fdbhp->rwlock);
1008 		return;
1009 	}
1010 	fdbp = kmem_zalloc(sizeof (fdb_t), KM_NOSLEEP);
1011 	if (fdbp == NULL) {
1012 		RW_EXIT(&fdbhp->rwlock);
1013 		return;
1014 	}
1015 	bzero(fdbp->macaddr, ETHERADDRL);
1016 	fdbp->m_tx = m_tx;
1017 	fdbp->txarg = txarg;
1018 	fdbp->nextp = NULL;
1019 	fdbhp->headp = fdbp;
1020 
1021 	RW_EXIT(&fdbhp->rwlock);
1022 }
1023 
1024 /* delete default route entry from the forwarding database */
1025 void
1026 vnet_del_def_rte(void *arg)
1027 {
1028 	vnet_t *vnetp = (vnet_t *)arg;
1029 	fdb_t *fdbp;
1030 	fdb_fanout_t *fdbhp;
1031 
1032 	/*
1033 	 * The last hash list is reserved for default route entry,
1034 	 * and for now, we have only one entry in this list.
1035 	 */
1036 	fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
1037 
1038 	WRITE_ENTER(&fdbhp->rwlock);
1039 
1040 	if (fdbhp->headp == NULL) {
1041 		RW_EXIT(&fdbhp->rwlock);
1042 		return;
1043 	}
1044 	fdbp = fdbhp->headp;
1045 	KMEM_FREE(fdbp);
1046 	fdbhp->headp = NULL;
1047 
1048 	RW_EXIT(&fdbhp->rwlock);
1049 }
1050