xref: /illumos-gate/usr/src/uts/sun4v/io/vnet.c (revision e7cbe64f7a72dae5cb44f100db60ca88f3313c65)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/modhash.h>
40 #include <sys/debug.h>
41 #include <sys/ethernet.h>
42 #include <sys/dlpi.h>
43 #include <net/if.h>
44 #include <sys/mac.h>
45 #include <sys/mac_ether.h>
46 #include <sys/ddi.h>
47 #include <sys/sunddi.h>
48 #include <sys/strsun.h>
49 #include <sys/note.h>
50 #include <sys/atomic.h>
51 #include <sys/vnet.h>
52 #include <sys/vlan.h>
53 
54 /*
55  * Function prototypes.
56  */
57 
58 /* DDI entrypoints */
59 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
60 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
61 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
62 
63 /* MAC entrypoints  */
64 static int vnet_m_stat(void *, uint_t, uint64_t *);
65 static int vnet_m_start(void *);
66 static void vnet_m_stop(void *);
67 static int vnet_m_promisc(void *, boolean_t);
68 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
69 static int vnet_m_unicst(void *, const uint8_t *);
70 mblk_t *vnet_m_tx(void *, mblk_t *);
71 
72 /* vnet internal functions */
73 static int vnet_mac_register(vnet_t *);
74 static int vnet_read_mac_address(vnet_t *vnetp);
75 static void vnet_add_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp);
76 static void vnet_del_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp);
77 static vp_tl_t *vnet_get_vptl(vnet_t *vnetp, const char *devname);
78 
79 /* Forwarding database (FDB) routines */
80 static void vnet_fdb_create(vnet_t *vnetp);
81 static void vnet_fdb_destroy(vnet_t *vnetp);
82 static vnet_fdbe_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *eaddr);
83 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
84 void vnet_fdbe_add(vnet_t *vnetp, struct ether_addr *macaddr,
85 	uint8_t type, mac_tx_t m_tx, void *port);
86 void vnet_fdbe_del(vnet_t *vnetp, struct ether_addr *eaddr);
87 void vnet_fdbe_modify(vnet_t *vnetp, struct ether_addr *macaddr,
88 	void *portp, boolean_t flag);
89 
90 void vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp);
91 void vnet_tx_update(void *arg);
92 
93 /* externs */
94 extern int vgen_init(vnet_t *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
95 	mac_register_t **vgenmacp);
96 extern int vgen_uninit(void *arg);
97 
98 #define	VNET_FDBE_REFHOLD(p)						\
99 {									\
100 	atomic_inc_32(&(p)->refcnt);					\
101 	ASSERT((p)->refcnt != 0);					\
102 }
103 
104 #define	VNET_FDBE_REFRELE(p)						\
105 {									\
106 	ASSERT((p)->refcnt != 0);					\
107 	atomic_dec_32(&(p)->refcnt);					\
108 }
109 
110 static mac_callbacks_t vnet_m_callbacks = {
111 	0,
112 	vnet_m_stat,
113 	vnet_m_start,
114 	vnet_m_stop,
115 	vnet_m_promisc,
116 	vnet_m_multicst,
117 	vnet_m_unicst,
118 	vnet_m_tx,
119 	NULL,
120 	NULL,
121 	NULL
122 };
123 
124 /*
125  * Linked list of "vnet_t" structures - one per instance.
126  */
127 static vnet_t	*vnet_headp = NULL;
128 static krwlock_t vnet_rw;
129 
130 /* Tunables */
131 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
132 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
133 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
134 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
135 
136 /* # of chains in fdb hash table */
137 uint32_t	vnet_fdb_nchains = VNET_NFDB_HASH;
138 
139 /* Internal tunables */
140 uint32_t	vnet_ethermtu = 1500;	/* mtu of the device */
141 
142 /*
143  * Default vlan id. This is only used internally when the "default-vlan-id"
144  * property is not present in the MD device node. Therefore, this should not be
145  * used as a tunable; if this value is changed, the corresponding variable
146  * should be updated to the same value in vsw and also other vnets connected to
147  * the same vsw.
148  */
149 uint16_t	vnet_default_vlan_id = 1;
150 
151 /* delay in usec to wait for all references on a fdb entry to be dropped */
152 uint32_t vnet_fdbe_refcnt_delay = 10;
153 
154 /*
155  * Property names
156  */
157 static char macaddr_propname[] = "local-mac-address";
158 
159 /*
160  * This is the string displayed by modinfo(1m).
161  */
162 static char vnet_ident[] = "vnet driver v%I%";
163 extern struct mod_ops mod_driverops;
164 static struct cb_ops cb_vnetops = {
165 	nulldev,		/* cb_open */
166 	nulldev,		/* cb_close */
167 	nodev,			/* cb_strategy */
168 	nodev,			/* cb_print */
169 	nodev,			/* cb_dump */
170 	nodev,			/* cb_read */
171 	nodev,			/* cb_write */
172 	nodev,			/* cb_ioctl */
173 	nodev,			/* cb_devmap */
174 	nodev,			/* cb_mmap */
175 	nodev,			/* cb_segmap */
176 	nochpoll,		/* cb_chpoll */
177 	ddi_prop_op,		/* cb_prop_op */
178 	NULL,			/* cb_stream */
179 	(int)(D_MP)		/* cb_flag */
180 };
181 
182 static struct dev_ops vnetops = {
183 	DEVO_REV,		/* devo_rev */
184 	0,			/* devo_refcnt */
185 	NULL,			/* devo_getinfo */
186 	nulldev,		/* devo_identify */
187 	nulldev,		/* devo_probe */
188 	vnetattach,		/* devo_attach */
189 	vnetdetach,		/* devo_detach */
190 	nodev,			/* devo_reset */
191 	&cb_vnetops,		/* devo_cb_ops */
192 	(struct bus_ops *)NULL	/* devo_bus_ops */
193 };
194 
195 static struct modldrv modldrv = {
196 	&mod_driverops,		/* Type of module.  This one is a driver */
197 	vnet_ident,		/* ID string */
198 	&vnetops		/* driver specific ops */
199 };
200 
201 static struct modlinkage modlinkage = {
202 	MODREV_1, (void *)&modldrv, NULL
203 };
204 
205 #ifdef DEBUG
206 
207 /*
208  * Print debug messages - set to 0xf to enable all msgs
209  */
210 int vnet_dbglevel = 0x8;
211 
212 static void
213 debug_printf(const char *fname, void *arg, const char *fmt, ...)
214 {
215 	char    buf[512];
216 	va_list ap;
217 	vnet_t *vnetp = (vnet_t *)arg;
218 	char    *bufp = buf;
219 
220 	if (vnetp == NULL) {
221 		(void) sprintf(bufp, "%s: ", fname);
222 		bufp += strlen(bufp);
223 	} else {
224 		(void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
225 		bufp += strlen(bufp);
226 	}
227 	va_start(ap, fmt);
228 	(void) vsprintf(bufp, fmt, ap);
229 	va_end(ap);
230 	cmn_err(CE_CONT, "%s\n", buf);
231 }
232 
233 #endif
234 
235 /* _init(9E): initialize the loadable module */
236 int
237 _init(void)
238 {
239 	int status;
240 
241 	DBG1(NULL, "enter\n");
242 
243 	mac_init_ops(&vnetops, "vnet");
244 	status = mod_install(&modlinkage);
245 	if (status != 0) {
246 		mac_fini_ops(&vnetops);
247 	}
248 
249 	DBG1(NULL, "exit(%d)\n", status);
250 	return (status);
251 }
252 
253 /* _fini(9E): prepare the module for unloading. */
254 int
255 _fini(void)
256 {
257 	int status;
258 
259 	DBG1(NULL, "enter\n");
260 
261 	status = mod_remove(&modlinkage);
262 	if (status != 0)
263 		return (status);
264 	mac_fini_ops(&vnetops);
265 
266 	DBG1(NULL, "exit(%d)\n", status);
267 	return (status);
268 }
269 
270 /* _info(9E): return information about the loadable module */
271 int
272 _info(struct modinfo *modinfop)
273 {
274 	return (mod_info(&modlinkage, modinfop));
275 }
276 
277 /*
278  * attach(9E): attach a device to the system.
279  * called once for each instance of the device on the system.
280  */
281 static int
282 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
283 {
284 	vnet_t		*vnetp;
285 	vp_tl_t		*vp_tlp;
286 	int		instance;
287 	int		status;
288 	mac_register_t	*vgenmacp = NULL;
289 	enum	{ AST_init = 0x0, AST_vnet_alloc = 0x1,
290 		AST_mac_alloc = 0x2, AST_read_macaddr = 0x4,
291 		AST_vgen_init = 0x8, AST_vptl_alloc = 0x10,
292 		AST_fdbh_alloc = 0x20 } attach_state;
293 
294 	attach_state = AST_init;
295 
296 	switch (cmd) {
297 	case DDI_ATTACH:
298 		break;
299 	case DDI_RESUME:
300 	case DDI_PM_RESUME:
301 	default:
302 		goto vnet_attach_fail;
303 	}
304 
305 	instance = ddi_get_instance(dip);
306 	DBG1(NULL, "instance(%d) enter\n", instance);
307 
308 	/* allocate vnet_t and mac_t structures */
309 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
310 	attach_state |= AST_vnet_alloc;
311 
312 	/* setup links to vnet_t from both devinfo and mac_t */
313 	ddi_set_driver_private(dip, (caddr_t)vnetp);
314 	vnetp->dip = dip;
315 	vnetp->instance = instance;
316 
317 	/* read the mac address */
318 	status = vnet_read_mac_address(vnetp);
319 	if (status != DDI_SUCCESS) {
320 		goto vnet_attach_fail;
321 	}
322 	attach_state |= AST_read_macaddr;
323 
324 	/*
325 	 * Initialize the generic vnet proxy transport. This is the first
326 	 * and default transport used by vnet. The generic transport
327 	 * is provided by using sun4v LDC (logical domain channel). On success,
328 	 * vgen_init() provides a pointer to mac_t of generic transport.
329 	 * Currently, this generic layer provides network connectivity to other
330 	 * vnets within ldoms and also to remote hosts oustide ldoms through
331 	 * the virtual switch (vsw) device on domain0. In the future, when
332 	 * physical adapters that are able to share their resources (such as
333 	 * dma channels) with guest domains become available, the vnet device
334 	 * will use hardware specific driver to communicate directly over the
335 	 * physical device to reach remote hosts without going through vswitch.
336 	 */
337 	status = vgen_init(vnetp, vnetp->dip, (uint8_t *)vnetp->curr_macaddr,
338 	    &vgenmacp);
339 	if (status != DDI_SUCCESS) {
340 		DERR(vnetp, "vgen_init() failed\n");
341 		goto vnet_attach_fail;
342 	}
343 	rw_init(&vnetp->trwlock, NULL, RW_DRIVER, NULL);
344 	attach_state |= AST_vgen_init;
345 
346 	vp_tlp = kmem_zalloc(sizeof (vp_tl_t), KM_SLEEP);
347 	vp_tlp->macp = vgenmacp;
348 	(void) snprintf(vp_tlp->name, MAXNAMELEN, "%s%u", "vgen", instance);
349 	(void) strcpy(vnetp->vgen_name, vp_tlp->name);
350 
351 	/* add generic transport to the list of vnet proxy transports */
352 	vnet_add_vptl(vnetp, vp_tlp);
353 	attach_state |= AST_vptl_alloc;
354 
355 	vnet_fdb_create(vnetp);
356 	attach_state |= AST_fdbh_alloc;
357 
358 	/* register with MAC layer */
359 	status = vnet_mac_register(vnetp);
360 	if (status != DDI_SUCCESS) {
361 		goto vnet_attach_fail;
362 	}
363 
364 	/* add to the list of vnet devices */
365 	WRITE_ENTER(&vnet_rw);
366 	vnetp->nextp = vnet_headp;
367 	vnet_headp = vnetp;
368 	RW_EXIT(&vnet_rw);
369 
370 	DBG1(NULL, "instance(%d) exit\n", instance);
371 	return (DDI_SUCCESS);
372 
373 vnet_attach_fail:
374 	if (attach_state & AST_fdbh_alloc) {
375 		vnet_fdb_destroy(vnetp);
376 	}
377 	if (attach_state & AST_vptl_alloc) {
378 		WRITE_ENTER(&vnetp->trwlock);
379 		vnet_del_vptl(vnetp, vp_tlp);
380 		RW_EXIT(&vnetp->trwlock);
381 	}
382 	if (attach_state & AST_vgen_init) {
383 		(void) vgen_uninit(vgenmacp->m_driver);
384 		rw_destroy(&vnetp->trwlock);
385 	}
386 	if (attach_state & AST_vnet_alloc) {
387 		KMEM_FREE(vnetp);
388 	}
389 	return (DDI_FAILURE);
390 }
391 
392 /*
393  * detach(9E): detach a device from the system.
394  */
395 static int
396 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
397 {
398 	vnet_t		*vnetp;
399 	vnet_t		**vnetpp;
400 	vp_tl_t		*vp_tlp;
401 	int		instance;
402 	int		rv;
403 
404 	instance = ddi_get_instance(dip);
405 	DBG1(NULL, "instance(%d) enter\n", instance);
406 
407 	vnetp = ddi_get_driver_private(dip);
408 	if (vnetp == NULL) {
409 		goto vnet_detach_fail;
410 	}
411 
412 	switch (cmd) {
413 	case DDI_DETACH:
414 		break;
415 	case DDI_SUSPEND:
416 	case DDI_PM_SUSPEND:
417 	default:
418 		goto vnet_detach_fail;
419 	}
420 
421 	/* uninit and free vnet proxy transports */
422 	WRITE_ENTER(&vnetp->trwlock);
423 	while ((vp_tlp = vnetp->tlp) != NULL) {
424 		if (strcmp(vnetp->vgen_name, vp_tlp->name) == 0) {
425 			/* uninitialize generic transport */
426 			rv = vgen_uninit(vp_tlp->macp->m_driver);
427 			if (rv != DDI_SUCCESS) {
428 				RW_EXIT(&vnetp->trwlock);
429 				goto vnet_detach_fail;
430 			}
431 		}
432 		vnet_del_vptl(vnetp, vp_tlp);
433 	}
434 	RW_EXIT(&vnetp->trwlock);
435 
436 	/*
437 	 * Unregister from the MAC subsystem.  This can fail, in
438 	 * particular if there are DLPI style-2 streams still open -
439 	 * in which case we just return failure.
440 	 */
441 	if (mac_unregister(vnetp->mh) != 0)
442 		goto vnet_detach_fail;
443 
444 	/* unlink from instance(vnet_t) list */
445 	WRITE_ENTER(&vnet_rw);
446 	for (vnetpp = &vnet_headp; *vnetpp; vnetpp = &(*vnetpp)->nextp) {
447 		if (*vnetpp == vnetp) {
448 			*vnetpp = vnetp->nextp;
449 			break;
450 		}
451 	}
452 	RW_EXIT(&vnet_rw);
453 
454 	/* destroy fdb */
455 	vnet_fdb_destroy(vnetp);
456 
457 	rw_destroy(&vnetp->trwlock);
458 	KMEM_FREE(vnetp);
459 
460 	return (DDI_SUCCESS);
461 
462 vnet_detach_fail:
463 	return (DDI_FAILURE);
464 }
465 
466 /* enable the device for transmit/receive */
467 static int
468 vnet_m_start(void *arg)
469 {
470 	vnet_t		*vnetp = arg;
471 	vp_tl_t		*vp_tlp;
472 	mac_register_t	*vp_macp;
473 	mac_callbacks_t	*cbp;
474 
475 	DBG1(vnetp, "enter\n");
476 
477 	/*
478 	 * NOTE:
479 	 * Currently, we only have generic transport. m_start() invokes
480 	 * vgen_start() which enables ports/channels in vgen and
481 	 * initiates handshake with peer vnets and vsw. In the future when we
482 	 * have support for hardware specific transports, this information
483 	 * needs to be propagted back to vnet from vgen and we need to revisit
484 	 * this code (see comments in vnet_attach()).
485 	 *
486 	 */
487 	WRITE_ENTER(&vnetp->trwlock);
488 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
489 		vp_macp = vp_tlp->macp;
490 		cbp = vp_macp->m_callbacks;
491 		cbp->mc_start(vp_macp->m_driver);
492 	}
493 	RW_EXIT(&vnetp->trwlock);
494 
495 	DBG1(vnetp, "exit\n");
496 	return (VNET_SUCCESS);
497 
498 }
499 
500 /* stop transmit/receive for the device */
501 static void
502 vnet_m_stop(void *arg)
503 {
504 	vnet_t		*vnetp = arg;
505 	vp_tl_t		*vp_tlp;
506 	mac_register_t	*vp_macp;
507 	mac_callbacks_t	*cbp;
508 
509 	DBG1(vnetp, "enter\n");
510 
511 	WRITE_ENTER(&vnetp->trwlock);
512 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
513 		vp_macp = vp_tlp->macp;
514 		cbp = vp_macp->m_callbacks;
515 		cbp->mc_stop(vp_macp->m_driver);
516 	}
517 	RW_EXIT(&vnetp->trwlock);
518 
519 	DBG1(vnetp, "exit\n");
520 }
521 
522 /* set the unicast mac address of the device */
523 static int
524 vnet_m_unicst(void *arg, const uint8_t *macaddr)
525 {
526 	_NOTE(ARGUNUSED(macaddr))
527 
528 	vnet_t *vnetp = arg;
529 
530 	DBG1(vnetp, "enter\n");
531 	/*
532 	 * NOTE: setting mac address dynamically is not supported.
533 	 */
534 	DBG1(vnetp, "exit\n");
535 
536 	return (VNET_FAILURE);
537 }
538 
539 /* enable/disable a multicast address */
540 static int
541 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
542 {
543 	_NOTE(ARGUNUSED(add, mca))
544 
545 	vnet_t *vnetp = arg;
546 	vp_tl_t		*vp_tlp;
547 	mac_register_t	*vp_macp;
548 	mac_callbacks_t	*cbp;
549 	int rv = VNET_SUCCESS;
550 
551 	DBG1(vnetp, "enter\n");
552 	READ_ENTER(&vnetp->trwlock);
553 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
554 		if (strcmp(vnetp->vgen_name, vp_tlp->name) == 0) {
555 			vp_macp = vp_tlp->macp;
556 			cbp = vp_macp->m_callbacks;
557 			rv = cbp->mc_multicst(vp_macp->m_driver, add, mca);
558 			break;
559 		}
560 	}
561 	RW_EXIT(&vnetp->trwlock);
562 	DBG1(vnetp, "exit(%d)\n", rv);
563 	return (rv);
564 }
565 
566 /* set or clear promiscuous mode on the device */
567 static int
568 vnet_m_promisc(void *arg, boolean_t on)
569 {
570 	_NOTE(ARGUNUSED(on))
571 
572 	vnet_t *vnetp = arg;
573 	DBG1(vnetp, "enter\n");
574 	/*
575 	 * NOTE: setting promiscuous mode is not supported, just return success.
576 	 */
577 	DBG1(vnetp, "exit\n");
578 	return (VNET_SUCCESS);
579 }
580 
581 /*
582  * Transmit a chain of packets. This function provides switching functionality
583  * based on the destination mac address to reach other guests (within ldoms) or
584  * external hosts.
585  */
586 mblk_t *
587 vnet_m_tx(void *arg, mblk_t *mp)
588 {
589 	vnet_t			*vnetp;
590 	vnet_fdbe_t		*fp;
591 	mblk_t			*next;
592 	mblk_t 			*resid_mp;
593 	struct ether_header 	*ehp;
594 
595 	vnetp = (vnet_t *)arg;
596 	DBG1(vnetp, "enter\n");
597 	ASSERT(mp != NULL);
598 
599 	while (mp != NULL) {
600 
601 		next = mp->b_next;
602 		mp->b_next = NULL;
603 
604 		/*
605 		 * Find fdb entry for the destination
606 		 * and hold a reference to it.
607 		 */
608 		ehp = (struct ether_header *)mp->b_rptr;
609 		fp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
610 		if (fp != NULL) {
611 
612 			/*
613 			 * Destination found in FDB.
614 			 * The destination is a vnet device within ldoms
615 			 * and directly reachable, invoke the tx function
616 			 * in the fdb entry.
617 			 */
618 			resid_mp = fp->m_tx(fp->txarg, mp);
619 
620 			/* tx done; now release ref on fdb entry */
621 			VNET_FDBE_REFRELE(fp);
622 
623 			if (resid_mp != NULL) {
624 				/* m_tx failed */
625 				mp->b_next = next;
626 				break;
627 			}
628 		} else {
629 			/*
630 			 * Destination is not in FDB.
631 			 * If the destination is broadcast/multicast
632 			 * or an unknown unicast address, forward the
633 			 * packet to vsw, using the cached fdb entry
634 			 * to vsw.
635 			 */
636 			READ_ENTER(&vnetp->vsw_fp_rw);
637 
638 			fp = vnetp->vsw_fp;
639 			if (fp == NULL) {
640 				/*
641 				 * no fdb entry to vsw? drop the packet.
642 				 */
643 				RW_EXIT(&vnetp->vsw_fp_rw);
644 				freemsg(mp);
645 				mp = next;
646 				continue;
647 			}
648 
649 			/* ref hold the fdb entry to vsw */
650 			VNET_FDBE_REFHOLD(fp);
651 
652 			RW_EXIT(&vnetp->vsw_fp_rw);
653 
654 			resid_mp = fp->m_tx(fp->txarg, mp);
655 
656 			/* tx done; now release ref on fdb entry */
657 			VNET_FDBE_REFRELE(fp);
658 
659 			if (resid_mp != NULL) {
660 				/* m_tx failed */
661 				mp->b_next = next;
662 				break;
663 			}
664 		}
665 
666 		mp = next;
667 	}
668 
669 	DBG1(vnetp, "exit\n");
670 	return (mp);
671 }
672 
673 /* get statistics from the device */
674 int
675 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
676 {
677 	vnet_t *vnetp = arg;
678 	vp_tl_t	*vp_tlp;
679 	mac_register_t	*vp_macp;
680 	mac_callbacks_t	*cbp;
681 	uint64_t val_total = 0;
682 
683 	DBG1(vnetp, "enter\n");
684 
685 	/*
686 	 * get the specified statistic from each transport and return the
687 	 * aggregate val.  This obviously only works for counters.
688 	 */
689 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
690 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
691 		return (ENOTSUP);
692 	}
693 	READ_ENTER(&vnetp->trwlock);
694 	for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
695 		vp_macp = vp_tlp->macp;
696 		cbp = vp_macp->m_callbacks;
697 		if (cbp->mc_getstat(vp_macp->m_driver, stat, val) == 0)
698 			val_total += *val;
699 	}
700 	RW_EXIT(&vnetp->trwlock);
701 
702 	*val = val_total;
703 
704 	DBG1(vnetp, "exit\n");
705 	return (0);
706 }
707 
708 /* wrapper function for mac_register() */
709 static int
710 vnet_mac_register(vnet_t *vnetp)
711 {
712 	mac_register_t	*macp;
713 	int		err;
714 
715 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
716 		return (DDI_FAILURE);
717 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
718 	macp->m_driver = vnetp;
719 	macp->m_dip = vnetp->dip;
720 	macp->m_src_addr = vnetp->curr_macaddr;
721 	macp->m_callbacks = &vnet_m_callbacks;
722 	macp->m_min_sdu = 0;
723 	macp->m_max_sdu = vnet_ethermtu;
724 	macp->m_margin = VLAN_TAGSZ;
725 
726 	/*
727 	 * Finally, we're ready to register ourselves with the MAC layer
728 	 * interface; if this succeeds, we're all ready to start()
729 	 */
730 	err = mac_register(macp, &vnetp->mh);
731 	mac_free(macp);
732 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
733 }
734 
735 /* add vp_tl to the list */
736 static void
737 vnet_add_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp)
738 {
739 	vp_tl_t *ttlp;
740 
741 	WRITE_ENTER(&vnetp->trwlock);
742 	if (vnetp->tlp == NULL) {
743 		vnetp->tlp = vp_tlp;
744 	} else {
745 		ttlp = vnetp->tlp;
746 		while (ttlp->nextp)
747 			ttlp = ttlp->nextp;
748 		ttlp->nextp = vp_tlp;
749 	}
750 	RW_EXIT(&vnetp->trwlock);
751 }
752 
753 /* remove vp_tl from the list */
754 static void
755 vnet_del_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp)
756 {
757 	vp_tl_t *ttlp, **pretlp;
758 	boolean_t found = B_FALSE;
759 
760 	pretlp = &vnetp->tlp;
761 	ttlp = *pretlp;
762 	while (ttlp) {
763 		if (ttlp == vp_tlp) {
764 			found = B_TRUE;
765 			(*pretlp) = ttlp->nextp;
766 			ttlp->nextp = NULL;
767 			break;
768 		}
769 		pretlp = &(ttlp->nextp);
770 		ttlp = *pretlp;
771 	}
772 
773 	if (found) {
774 		KMEM_FREE(vp_tlp);
775 	}
776 }
777 
778 /* get vp_tl corresponding to the given name */
779 static vp_tl_t *
780 vnet_get_vptl(vnet_t *vnetp, const char *name)
781 {
782 	vp_tl_t *tlp;
783 
784 	tlp = vnetp->tlp;
785 	while (tlp) {
786 		if (strcmp(tlp->name, name) == 0) {
787 			return (tlp);
788 		}
789 		tlp = tlp->nextp;
790 	}
791 	DWARN(vnetp, "can't find vp_tl with name (%s)\n", name);
792 	return (NULL);
793 }
794 
795 /* read the mac address of the device */
796 static int
797 vnet_read_mac_address(vnet_t *vnetp)
798 {
799 	uchar_t 	*macaddr;
800 	uint32_t 	size;
801 	int 		rv;
802 
803 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
804 	    DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
805 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
806 		DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
807 		    macaddr_propname, rv);
808 		return (DDI_FAILURE);
809 	}
810 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
811 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
812 	ddi_prop_free(macaddr);
813 
814 	return (DDI_SUCCESS);
815 }
816 
817 static void
818 vnet_fdb_create(vnet_t *vnetp)
819 {
820 	char		hashname[MAXNAMELEN];
821 
822 	(void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
823 	    vnetp->instance);
824 	vnetp->fdb_nchains = vnet_fdb_nchains;
825 	vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
826 	    mod_hash_null_valdtor, sizeof (void *));
827 }
828 
829 static void
830 vnet_fdb_destroy(vnet_t *vnetp)
831 {
832 	/* destroy fdb-hash-table */
833 	if (vnetp->fdb_hashp != NULL) {
834 		mod_hash_destroy_hash(vnetp->fdb_hashp);
835 		vnetp->fdb_hashp = NULL;
836 		vnetp->fdb_nchains = 0;
837 	}
838 }
839 
840 /*
841  * Add an entry into the fdb.
842  */
843 void
844 vnet_fdbe_add(vnet_t *vnetp, struct ether_addr *macaddr, uint8_t type,
845 	mac_tx_t m_tx, void *port)
846 {
847 	uint64_t	addr = 0;
848 	vnet_fdbe_t	*fp;
849 	int		rv;
850 
851 	KEY_HASH(addr, macaddr);
852 
853 	fp = kmem_zalloc(sizeof (vnet_fdbe_t), KM_SLEEP);
854 	fp->txarg = port;
855 	fp->type = type;
856 	fp->m_tx = m_tx;
857 
858 	/*
859 	 * If the entry being added corresponds to vsw-port, we cache that
860 	 * entry and keep a permanent reference to it. This is done to avoid
861 	 * searching this entry when we need to transmit a frame with an
862 	 * unknown unicast destination, in vnet_m_tx().
863 	 */
864 	(fp->type == VNET_VSWPORT) ? (fp->refcnt = 1) : (fp->refcnt = 0);
865 
866 	/*
867 	 * Note: duplicate keys will be rejected by mod_hash.
868 	 */
869 	rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
870 	    (mod_hash_val_t)fp);
871 	if (rv != 0) {
872 		DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
873 		KMEM_FREE(fp);
874 		return;
875 	}
876 
877 	if (type == VNET_VSWPORT) {
878 		/* Cache the fdb entry to vsw-port */
879 		WRITE_ENTER(&vnetp->vsw_fp_rw);
880 		if (vnetp->vsw_fp == NULL)
881 			vnetp->vsw_fp = fp;
882 		RW_EXIT(&vnetp->vsw_fp_rw);
883 	}
884 }
885 
886 /*
887  * Remove an entry from fdb.
888  */
889 void
890 vnet_fdbe_del(vnet_t *vnetp, struct ether_addr *eaddr)
891 {
892 	uint64_t	addr = 0;
893 	vnet_fdbe_t	*fp;
894 	int		rv;
895 	uint32_t	refcnt;
896 
897 	KEY_HASH(addr, eaddr);
898 
899 	/*
900 	 * Remove the entry from fdb hash table.
901 	 * This prevents further references to this fdb entry.
902 	 */
903 	rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
904 	    (mod_hash_val_t *)&fp);
905 	ASSERT(rv == 0);
906 
907 	if (fp->type == VNET_VSWPORT) {
908 		WRITE_ENTER(&vnetp->vsw_fp_rw);
909 
910 		ASSERT(fp == vnetp->vsw_fp);
911 		vnetp->vsw_fp = NULL;
912 
913 		RW_EXIT(&vnetp->vsw_fp_rw);
914 	}
915 
916 	/*
917 	 * If there are threads already ref holding before the entry was
918 	 * removed from hash table, then wait for ref count to drop to zero.
919 	 */
920 	(fp->type == VNET_VSWPORT) ? (refcnt = 1) : (refcnt = 0);
921 	while (fp->refcnt > refcnt) {
922 		delay(drv_usectohz(vnet_fdbe_refcnt_delay));
923 	}
924 
925 	kmem_free(fp, sizeof (*fp));
926 }
927 
928 /*
929  * Modify the fdb entry for the given macaddr,
930  * to use the specified port for transmits.
931  */
932 void
933 vnet_fdbe_modify(vnet_t *vnetp, struct ether_addr *macaddr, void *portp,
934 	boolean_t flag)
935 {
936 	vnet_fdbe_t	*fp;
937 	uint64_t	addr = 0;
938 	int		rv;
939 	uint32_t	refcnt;
940 
941 	KEY_HASH(addr, macaddr);
942 
943 	/*
944 	 * Remove the entry from fdb hash table.
945 	 * This prevents further references to this fdb entry.
946 	 */
947 	rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
948 	    (mod_hash_val_t *)&fp);
949 	ASSERT(rv == 0);
950 
951 	/* fdb entry of vsw port must never be modified */
952 	ASSERT(fp->type == VNET_VNETPORT);
953 
954 	/*
955 	 * If there are threads already ref holding before the entry was
956 	 * removed from hash table, then wait for reference count to drop to
957 	 * zero. Note: flag indicates the context of caller. If we are in the
958 	 * context of transmit routine, there is a reference held by the caller
959 	 * too, in which case, wait for the refcnt to drop to 1.
960 	 */
961 	(flag == B_TRUE) ? (refcnt = 1) : (refcnt = 0);
962 	while (fp->refcnt > refcnt) {
963 		delay(drv_usectohz(vnet_fdbe_refcnt_delay));
964 	}
965 
966 	/* update the portp in fdb entry with the new value */
967 	fp->txarg = portp;
968 
969 	/* Reinsert the updated fdb entry into the table */
970 	rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
971 	    (mod_hash_val_t)fp);
972 	ASSERT(rv == 0);
973 }
974 
975 /*
976  * Search fdb for a given mac address. If an entry is found, hold
977  * a reference to it and return the entry; else returns NULL.
978  */
979 static vnet_fdbe_t *
980 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
981 {
982 	uint64_t	key = 0;
983 	vnet_fdbe_t	*fp;
984 	int		rv;
985 
986 	KEY_HASH(key, addrp);
987 
988 	rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
989 	    (mod_hash_val_t *)&fp, vnet_fdbe_find_cb);
990 
991 	if (rv != 0)
992 		return (NULL);
993 
994 	return (fp);
995 }
996 
997 /*
998  * Callback function provided to mod_hash_find_cb(). After finding the fdb
999  * entry corresponding to the key (macaddr), this callback will be invoked by
1000  * mod_hash_find_cb() to atomically increment the reference count on the fdb
1001  * entry before returning the found entry.
1002  */
1003 static void
1004 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
1005 {
1006 	_NOTE(ARGUNUSED(key))
1007 	VNET_FDBE_REFHOLD((vnet_fdbe_t *)val);
1008 }
1009 
1010 void
1011 vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
1012 {
1013 	vnet_t *vnetp = arg;
1014 	mac_rx(vnetp->mh, mrh, mp);
1015 }
1016 
1017 void
1018 vnet_tx_update(void *arg)
1019 {
1020 	vnet_t *vnetp = arg;
1021 	mac_tx_update(vnetp->mh);
1022 }
1023