xref: /titanic_51/usr/src/uts/sun4v/io/vnet.c (revision b6917abefc343244b784f0cc34bc65b01469c3bf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/modhash.h>
40 #include <sys/debug.h>
41 #include <sys/ethernet.h>
42 #include <sys/dlpi.h>
43 #include <net/if.h>
44 #include <sys/mac.h>
45 #include <sys/mac_ether.h>
46 #include <sys/ddi.h>
47 #include <sys/sunddi.h>
48 #include <sys/strsun.h>
49 #include <sys/note.h>
50 #include <sys/atomic.h>
51 #include <sys/vnet.h>
52 #include <sys/vlan.h>
53 #include <sys/vnet_mailbox.h>
54 #include <sys/vnet_common.h>
55 #include <sys/dds.h>
56 #include <sys/strsubr.h>
57 #include <sys/taskq.h>
58 
59 /*
60  * Function prototypes.
61  */
62 
63 /* DDI entrypoints */
64 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
65 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
66 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
67 
68 /* MAC entrypoints  */
69 static int vnet_m_stat(void *, uint_t, uint64_t *);
70 static int vnet_m_start(void *);
71 static void vnet_m_stop(void *);
72 static int vnet_m_promisc(void *, boolean_t);
73 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
74 static int vnet_m_unicst(void *, const uint8_t *);
75 mblk_t *vnet_m_tx(void *, mblk_t *);
76 
77 /* vnet internal functions */
78 static int vnet_mac_register(vnet_t *);
79 static int vnet_read_mac_address(vnet_t *vnetp);
80 
81 /* Forwarding database (FDB) routines */
82 static void vnet_fdb_create(vnet_t *vnetp);
83 static void vnet_fdb_destroy(vnet_t *vnetp);
84 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp);
85 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
86 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp);
87 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp);
88 
89 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp);
90 static void vnet_tx_update(vio_net_handle_t vrh);
91 static void vnet_res_start_task(void *arg);
92 static void vnet_start_resources(vnet_t *vnetp);
93 static void vnet_stop_resources(vnet_t *vnetp);
94 static void vnet_dispatch_res_task(vnet_t *vnetp);
95 static void vnet_res_start_task(void *arg);
96 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
97 
98 
99 /* Exported to to vnet_dds */
100 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
101 
102 /* Externs that are imported from vnet_gen */
103 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
104     const uint8_t *macaddr, void **vgenhdl);
105 extern int vgen_uninit(void *arg);
106 extern int vgen_dds_tx(void *arg, void *dmsg);
107 
108 /* Externs that are imported from vnet_dds */
109 extern void vdds_mod_init(void);
110 extern void vdds_mod_fini(void);
111 extern int vdds_init(vnet_t *vnetp);
112 extern void vdds_cleanup(vnet_t *vnetp);
113 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
114 extern void vdds_cleanup_hybrid_res(vnet_t *vnetp);
115 
116 #define	VNET_FDBE_REFHOLD(p)						\
117 {									\
118 	atomic_inc_32(&(p)->refcnt);					\
119 	ASSERT((p)->refcnt != 0);					\
120 }
121 
122 #define	VNET_FDBE_REFRELE(p)						\
123 {									\
124 	ASSERT((p)->refcnt != 0);					\
125 	atomic_dec_32(&(p)->refcnt);					\
126 }
127 
128 static mac_callbacks_t vnet_m_callbacks = {
129 	0,
130 	vnet_m_stat,
131 	vnet_m_start,
132 	vnet_m_stop,
133 	vnet_m_promisc,
134 	vnet_m_multicst,
135 	vnet_m_unicst,
136 	vnet_m_tx,
137 	NULL,
138 	NULL,
139 	NULL
140 };
141 
142 /*
143  * Linked list of "vnet_t" structures - one per instance.
144  */
145 static vnet_t	*vnet_headp = NULL;
146 static krwlock_t vnet_rw;
147 
148 /* Tunables */
149 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
150 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
151 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
152 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
153 
154 /* # of chains in fdb hash table */
155 uint32_t	vnet_fdb_nchains = VNET_NFDB_HASH;
156 
157 /* Internal tunables */
158 uint32_t	vnet_ethermtu = 1500;	/* mtu of the device */
159 
160 /*
161  * Default vlan id. This is only used internally when the "default-vlan-id"
162  * property is not present in the MD device node. Therefore, this should not be
163  * used as a tunable; if this value is changed, the corresponding variable
164  * should be updated to the same value in vsw and also other vnets connected to
165  * the same vsw.
166  */
167 uint16_t	vnet_default_vlan_id = 1;
168 
169 /* delay in usec to wait for all references on a fdb entry to be dropped */
170 uint32_t vnet_fdbe_refcnt_delay = 10;
171 
172 static struct ether_addr etherbroadcastaddr = {
173 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
174 };
175 
176 
177 /*
178  * Property names
179  */
180 static char macaddr_propname[] = "local-mac-address";
181 
182 /*
183  * This is the string displayed by modinfo(1m).
184  */
185 static char vnet_ident[] = "vnet driver v%I%";
186 extern struct mod_ops mod_driverops;
187 static struct cb_ops cb_vnetops = {
188 	nulldev,		/* cb_open */
189 	nulldev,		/* cb_close */
190 	nodev,			/* cb_strategy */
191 	nodev,			/* cb_print */
192 	nodev,			/* cb_dump */
193 	nodev,			/* cb_read */
194 	nodev,			/* cb_write */
195 	nodev,			/* cb_ioctl */
196 	nodev,			/* cb_devmap */
197 	nodev,			/* cb_mmap */
198 	nodev,			/* cb_segmap */
199 	nochpoll,		/* cb_chpoll */
200 	ddi_prop_op,		/* cb_prop_op */
201 	NULL,			/* cb_stream */
202 	(int)(D_MP)		/* cb_flag */
203 };
204 
205 static struct dev_ops vnetops = {
206 	DEVO_REV,		/* devo_rev */
207 	0,			/* devo_refcnt */
208 	NULL,			/* devo_getinfo */
209 	nulldev,		/* devo_identify */
210 	nulldev,		/* devo_probe */
211 	vnetattach,		/* devo_attach */
212 	vnetdetach,		/* devo_detach */
213 	nodev,			/* devo_reset */
214 	&cb_vnetops,		/* devo_cb_ops */
215 	(struct bus_ops *)NULL	/* devo_bus_ops */
216 };
217 
218 static struct modldrv modldrv = {
219 	&mod_driverops,		/* Type of module.  This one is a driver */
220 	vnet_ident,		/* ID string */
221 	&vnetops		/* driver specific ops */
222 };
223 
224 static struct modlinkage modlinkage = {
225 	MODREV_1, (void *)&modldrv, NULL
226 };
227 
228 #ifdef DEBUG
229 
230 /*
231  * Print debug messages - set to 0xf to enable all msgs
232  */
233 int vnet_dbglevel = 0x8;
234 
235 static void
236 debug_printf(const char *fname, void *arg, const char *fmt, ...)
237 {
238 	char    buf[512];
239 	va_list ap;
240 	vnet_t *vnetp = (vnet_t *)arg;
241 	char    *bufp = buf;
242 
243 	if (vnetp == NULL) {
244 		(void) sprintf(bufp, "%s: ", fname);
245 		bufp += strlen(bufp);
246 	} else {
247 		(void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
248 		bufp += strlen(bufp);
249 	}
250 	va_start(ap, fmt);
251 	(void) vsprintf(bufp, fmt, ap);
252 	va_end(ap);
253 	cmn_err(CE_CONT, "%s\n", buf);
254 }
255 
256 #endif
257 
258 /* _init(9E): initialize the loadable module */
259 int
260 _init(void)
261 {
262 	int status;
263 
264 	DBG1(NULL, "enter\n");
265 
266 	mac_init_ops(&vnetops, "vnet");
267 	status = mod_install(&modlinkage);
268 	if (status != 0) {
269 		mac_fini_ops(&vnetops);
270 	}
271 	vdds_mod_init();
272 	DBG1(NULL, "exit(%d)\n", status);
273 	return (status);
274 }
275 
276 /* _fini(9E): prepare the module for unloading. */
277 int
278 _fini(void)
279 {
280 	int status;
281 
282 	DBG1(NULL, "enter\n");
283 
284 	status = mod_remove(&modlinkage);
285 	if (status != 0)
286 		return (status);
287 	mac_fini_ops(&vnetops);
288 	vdds_mod_fini();
289 
290 	DBG1(NULL, "exit(%d)\n", status);
291 	return (status);
292 }
293 
294 /* _info(9E): return information about the loadable module */
295 int
296 _info(struct modinfo *modinfop)
297 {
298 	return (mod_info(&modlinkage, modinfop));
299 }
300 
301 /*
302  * attach(9E): attach a device to the system.
303  * called once for each instance of the device on the system.
304  */
305 static int
306 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
307 {
308 	vnet_t		*vnetp;
309 	int		status;
310 	int		instance;
311 	uint64_t	reg;
312 	char		qname[TASKQ_NAMELEN];
313 	enum	{ AST_init = 0x0, AST_vnet_alloc = 0x1,
314 		AST_mac_alloc = 0x2, AST_read_macaddr = 0x4,
315 		AST_vgen_init = 0x8, AST_fdbh_alloc = 0x10,
316 		AST_vdds_init = 0x20, AST_taskq_create = 0x40,
317 		AST_vnet_list = 0x80 } attach_state;
318 
319 	attach_state = AST_init;
320 
321 	switch (cmd) {
322 	case DDI_ATTACH:
323 		break;
324 	case DDI_RESUME:
325 	case DDI_PM_RESUME:
326 	default:
327 		goto vnet_attach_fail;
328 	}
329 
330 	instance = ddi_get_instance(dip);
331 	DBG1(NULL, "instance(%d) enter\n", instance);
332 
333 	/* allocate vnet_t and mac_t structures */
334 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
335 	vnetp->dip = dip;
336 	vnetp->instance = instance;
337 	rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL);
338 	rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
339 	attach_state |= AST_vnet_alloc;
340 
341 	status = vdds_init(vnetp);
342 	if (status != 0) {
343 		goto vnet_attach_fail;
344 	}
345 	attach_state |= AST_vdds_init;
346 
347 	/* setup links to vnet_t from both devinfo and mac_t */
348 	ddi_set_driver_private(dip, (caddr_t)vnetp);
349 
350 	/* read the mac address */
351 	status = vnet_read_mac_address(vnetp);
352 	if (status != DDI_SUCCESS) {
353 		goto vnet_attach_fail;
354 	}
355 	attach_state |= AST_read_macaddr;
356 
357 	reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
358 	    DDI_PROP_DONTPASS, "reg", -1);
359 	if (reg == -1) {
360 		goto vnet_attach_fail;
361 	}
362 	vnetp->reg = reg;
363 
364 	vnet_fdb_create(vnetp);
365 	attach_state |= AST_fdbh_alloc;
366 
367 	(void) snprintf(qname, TASKQ_NAMELEN, "vnet_taskq%d", instance);
368 	if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1,
369 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
370 		cmn_err(CE_WARN, "!vnet%d: Unable to create task queue",
371 		    instance);
372 		goto vnet_attach_fail;
373 	}
374 	attach_state |= AST_taskq_create;
375 
376 	/* add to the list of vnet devices */
377 	WRITE_ENTER(&vnet_rw);
378 	vnetp->nextp = vnet_headp;
379 	vnet_headp = vnetp;
380 	RW_EXIT(&vnet_rw);
381 
382 	attach_state |= AST_vnet_list;
383 
384 	/*
385 	 * Initialize the generic vnet plugin which provides
386 	 * communication via sun4v LDC (logical domain channel) based
387 	 * resources. It will register the LDC resources as and when
388 	 * they become available.
389 	 */
390 	status = vgen_init(vnetp, reg, vnetp->dip,
391 	    (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
392 	if (status != DDI_SUCCESS) {
393 		DERR(vnetp, "vgen_init() failed\n");
394 		goto vnet_attach_fail;
395 	}
396 	attach_state |= AST_vgen_init;
397 
398 	/* register with MAC layer */
399 	status = vnet_mac_register(vnetp);
400 	if (status != DDI_SUCCESS) {
401 		goto vnet_attach_fail;
402 	}
403 
404 	DBG1(NULL, "instance(%d) exit\n", instance);
405 	return (DDI_SUCCESS);
406 
407 vnet_attach_fail:
408 
409 	if (attach_state & AST_vnet_list) {
410 		vnet_t		**vnetpp;
411 		/* unlink from instance(vnet_t) list */
412 		WRITE_ENTER(&vnet_rw);
413 		for (vnetpp = &vnet_headp; *vnetpp;
414 		    vnetpp = &(*vnetpp)->nextp) {
415 			if (*vnetpp == vnetp) {
416 				*vnetpp = vnetp->nextp;
417 				break;
418 			}
419 		}
420 		RW_EXIT(&vnet_rw);
421 	}
422 
423 	if (attach_state & AST_vdds_init) {
424 		vdds_cleanup(vnetp);
425 	}
426 	if (attach_state & AST_taskq_create) {
427 		ddi_taskq_destroy(vnetp->taskqp);
428 	}
429 	if (attach_state & AST_fdbh_alloc) {
430 		vnet_fdb_destroy(vnetp);
431 	}
432 	if (attach_state & AST_vgen_init) {
433 		(void) vgen_uninit(vnetp->vgenhdl);
434 	}
435 	if (attach_state & AST_vnet_alloc) {
436 		rw_destroy(&vnetp->vrwlock);
437 		rw_destroy(&vnetp->vsw_fp_rw);
438 		KMEM_FREE(vnetp);
439 	}
440 	return (DDI_FAILURE);
441 }
442 
443 /*
444  * detach(9E): detach a device from the system.
445  */
446 static int
447 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
448 {
449 	vnet_t		*vnetp;
450 	vnet_t		**vnetpp;
451 	int		instance;
452 	int		rv;
453 
454 	instance = ddi_get_instance(dip);
455 	DBG1(NULL, "instance(%d) enter\n", instance);
456 
457 	vnetp = ddi_get_driver_private(dip);
458 	if (vnetp == NULL) {
459 		goto vnet_detach_fail;
460 	}
461 
462 	switch (cmd) {
463 	case DDI_DETACH:
464 		break;
465 	case DDI_SUSPEND:
466 	case DDI_PM_SUSPEND:
467 	default:
468 		goto vnet_detach_fail;
469 	}
470 
471 	(void) vdds_cleanup(vnetp);
472 	rv = vgen_uninit(vnetp->vgenhdl);
473 	if (rv != DDI_SUCCESS) {
474 		goto vnet_detach_fail;
475 	}
476 
477 	/*
478 	 * Unregister from the MAC subsystem.  This can fail, in
479 	 * particular if there are DLPI style-2 streams still open -
480 	 * in which case we just return failure.
481 	 */
482 	if (mac_unregister(vnetp->mh) != 0)
483 		goto vnet_detach_fail;
484 
485 	/* unlink from instance(vnet_t) list */
486 	WRITE_ENTER(&vnet_rw);
487 	for (vnetpp = &vnet_headp; *vnetpp; vnetpp = &(*vnetpp)->nextp) {
488 		if (*vnetpp == vnetp) {
489 			*vnetpp = vnetp->nextp;
490 			break;
491 		}
492 	}
493 	RW_EXIT(&vnet_rw);
494 
495 	ddi_taskq_destroy(vnetp->taskqp);
496 	/* destroy fdb */
497 	vnet_fdb_destroy(vnetp);
498 
499 	rw_destroy(&vnetp->vrwlock);
500 	rw_destroy(&vnetp->vsw_fp_rw);
501 	KMEM_FREE(vnetp);
502 
503 	return (DDI_SUCCESS);
504 
505 vnet_detach_fail:
506 	return (DDI_FAILURE);
507 }
508 
509 /* enable the device for transmit/receive */
510 static int
511 vnet_m_start(void *arg)
512 {
513 	vnet_t		*vnetp = arg;
514 
515 	DBG1(vnetp, "enter\n");
516 
517 	WRITE_ENTER(&vnetp->vrwlock);
518 	vnetp->flags |= VNET_STARTED;
519 	vnet_start_resources(vnetp);
520 	RW_EXIT(&vnetp->vrwlock);
521 
522 	DBG1(vnetp, "exit\n");
523 	return (VNET_SUCCESS);
524 
525 }
526 
527 /* stop transmit/receive for the device */
528 static void
529 vnet_m_stop(void *arg)
530 {
531 	vnet_t		*vnetp = arg;
532 
533 	DBG1(vnetp, "enter\n");
534 
535 	WRITE_ENTER(&vnetp->vrwlock);
536 	if (vnetp->flags & VNET_STARTED) {
537 		vnet_stop_resources(vnetp);
538 		vnetp->flags &= ~VNET_STARTED;
539 	}
540 	RW_EXIT(&vnetp->vrwlock);
541 
542 	DBG1(vnetp, "exit\n");
543 }
544 
545 /* set the unicast mac address of the device */
546 static int
547 vnet_m_unicst(void *arg, const uint8_t *macaddr)
548 {
549 	_NOTE(ARGUNUSED(macaddr))
550 
551 	vnet_t *vnetp = arg;
552 
553 	DBG1(vnetp, "enter\n");
554 	/*
555 	 * NOTE: setting mac address dynamically is not supported.
556 	 */
557 	DBG1(vnetp, "exit\n");
558 
559 	return (VNET_FAILURE);
560 }
561 
562 /* enable/disable a multicast address */
563 static int
564 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
565 {
566 	_NOTE(ARGUNUSED(add, mca))
567 
568 	vnet_t *vnetp = arg;
569 	vnet_res_t	*vresp;
570 	mac_register_t	*macp;
571 	mac_callbacks_t	*cbp;
572 	int rv = VNET_SUCCESS;
573 
574 	DBG1(vnetp, "enter\n");
575 
576 	READ_ENTER(&vnetp->vrwlock);
577 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
578 		if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
579 			macp = &vresp->macreg;
580 			cbp = macp->m_callbacks;
581 			rv = cbp->mc_multicst(macp->m_driver, add, mca);
582 		}
583 	}
584 	RW_EXIT(&vnetp->vrwlock);
585 
586 	DBG1(vnetp, "exit(%d)\n", rv);
587 	return (rv);
588 }
589 
590 /* set or clear promiscuous mode on the device */
591 static int
592 vnet_m_promisc(void *arg, boolean_t on)
593 {
594 	_NOTE(ARGUNUSED(on))
595 
596 	vnet_t *vnetp = arg;
597 	DBG1(vnetp, "enter\n");
598 	/*
599 	 * NOTE: setting promiscuous mode is not supported, just return success.
600 	 */
601 	DBG1(vnetp, "exit\n");
602 	return (VNET_SUCCESS);
603 }
604 
605 /*
606  * Transmit a chain of packets. This function provides switching functionality
607  * based on the destination mac address to reach other guests (within ldoms) or
608  * external hosts.
609  */
610 mblk_t *
611 vnet_m_tx(void *arg, mblk_t *mp)
612 {
613 	vnet_t			*vnetp;
614 	vnet_res_t		*vresp;
615 	mblk_t			*next;
616 	mblk_t			*resid_mp;
617 	mac_register_t		*macp;
618 	struct ether_header	*ehp;
619 	boolean_t		is_unicast;
620 
621 	vnetp = (vnet_t *)arg;
622 	DBG1(vnetp, "enter\n");
623 	ASSERT(mp != NULL);
624 
625 	while (mp != NULL) {
626 
627 		next = mp->b_next;
628 		mp->b_next = NULL;
629 
630 		/*
631 		 * Find fdb entry for the destination
632 		 * and hold a reference to it.
633 		 */
634 		ehp = (struct ether_header *)mp->b_rptr;
635 		vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
636 		if (vresp != NULL) {
637 
638 			/*
639 			 * Destination found in FDB.
640 			 * The destination is a vnet device within ldoms
641 			 * and directly reachable, invoke the tx function
642 			 * in the fdb entry.
643 			 */
644 			macp = &vresp->macreg;
645 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
646 
647 			/* tx done; now release ref on fdb entry */
648 			VNET_FDBE_REFRELE(vresp);
649 
650 			if (resid_mp != NULL) {
651 				/* m_tx failed */
652 				mp->b_next = next;
653 				break;
654 			}
655 		} else {
656 			is_unicast = !(IS_BROADCAST(ehp) ||
657 			    (IS_MULTICAST(ehp)));
658 			/*
659 			 * Destination is not in FDB.
660 			 * If the destination is broadcast or multicast,
661 			 * then forward the packet to vswitch.
662 			 * If a Hybrid resource avilable, then send the
663 			 * unicast packet via hybrid resource, otherwise
664 			 * forward it to vswitch.
665 			 */
666 			READ_ENTER(&vnetp->vsw_fp_rw);
667 
668 			if ((is_unicast) && (vnetp->hio_fp != NULL)) {
669 				vresp = vnetp->hio_fp;
670 			} else {
671 				vresp = vnetp->vsw_fp;
672 			}
673 			if (vresp == NULL) {
674 				/*
675 				 * no fdb entry to vsw? drop the packet.
676 				 */
677 				RW_EXIT(&vnetp->vsw_fp_rw);
678 				freemsg(mp);
679 				mp = next;
680 				continue;
681 			}
682 
683 			/* ref hold the fdb entry to vsw */
684 			VNET_FDBE_REFHOLD(vresp);
685 
686 			RW_EXIT(&vnetp->vsw_fp_rw);
687 
688 			macp = &vresp->macreg;
689 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
690 
691 			/* tx done; now release ref on fdb entry */
692 			VNET_FDBE_REFRELE(vresp);
693 
694 			if (resid_mp != NULL) {
695 				/* m_tx failed */
696 				mp->b_next = next;
697 				break;
698 			}
699 		}
700 
701 		mp = next;
702 	}
703 
704 	DBG1(vnetp, "exit\n");
705 	return (mp);
706 }
707 
708 /* get statistics from the device */
709 int
710 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
711 {
712 	vnet_t *vnetp = arg;
713 	vnet_res_t	*vresp;
714 	mac_register_t	*macp;
715 	mac_callbacks_t	*cbp;
716 	uint64_t val_total = 0;
717 
718 	DBG1(vnetp, "enter\n");
719 
720 	/*
721 	 * get the specified statistic from each transport and return the
722 	 * aggregate val.  This obviously only works for counters.
723 	 */
724 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
725 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
726 		return (ENOTSUP);
727 	}
728 
729 	READ_ENTER(&vnetp->vrwlock);
730 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
731 		macp = &vresp->macreg;
732 		cbp = macp->m_callbacks;
733 		if (cbp->mc_getstat(macp->m_driver, stat, val) == 0)
734 			val_total += *val;
735 	}
736 	RW_EXIT(&vnetp->vrwlock);
737 
738 	*val = val_total;
739 
740 	DBG1(vnetp, "exit\n");
741 	return (0);
742 }
743 
744 /* wrapper function for mac_register() */
745 static int
746 vnet_mac_register(vnet_t *vnetp)
747 {
748 	mac_register_t	*macp;
749 	int		err;
750 
751 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
752 		return (DDI_FAILURE);
753 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
754 	macp->m_driver = vnetp;
755 	macp->m_dip = vnetp->dip;
756 	macp->m_src_addr = vnetp->curr_macaddr;
757 	macp->m_callbacks = &vnet_m_callbacks;
758 	macp->m_min_sdu = 0;
759 	macp->m_max_sdu = vnet_ethermtu;
760 	macp->m_margin = VLAN_TAGSZ;
761 
762 	/*
763 	 * Finally, we're ready to register ourselves with the MAC layer
764 	 * interface; if this succeeds, we're all ready to start()
765 	 */
766 	err = mac_register(macp, &vnetp->mh);
767 	mac_free(macp);
768 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
769 }
770 
771 /* read the mac address of the device */
772 static int
773 vnet_read_mac_address(vnet_t *vnetp)
774 {
775 	uchar_t 	*macaddr;
776 	uint32_t 	size;
777 	int 		rv;
778 
779 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
780 	    DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
781 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
782 		DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
783 		    macaddr_propname, rv);
784 		return (DDI_FAILURE);
785 	}
786 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
787 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
788 	ddi_prop_free(macaddr);
789 
790 	return (DDI_SUCCESS);
791 }
792 
793 static void
794 vnet_fdb_create(vnet_t *vnetp)
795 {
796 	char		hashname[MAXNAMELEN];
797 
798 	(void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
799 	    vnetp->instance);
800 	vnetp->fdb_nchains = vnet_fdb_nchains;
801 	vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
802 	    mod_hash_null_valdtor, sizeof (void *));
803 }
804 
805 static void
806 vnet_fdb_destroy(vnet_t *vnetp)
807 {
808 	/* destroy fdb-hash-table */
809 	if (vnetp->fdb_hashp != NULL) {
810 		mod_hash_destroy_hash(vnetp->fdb_hashp);
811 		vnetp->fdb_hashp = NULL;
812 		vnetp->fdb_nchains = 0;
813 	}
814 }
815 
816 /*
817  * Add an entry into the fdb.
818  */
819 void
820 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp)
821 {
822 	uint64_t	addr = 0;
823 	int		rv;
824 
825 	KEY_HASH(addr, vresp->rem_macaddr);
826 
827 	/*
828 	 * If the entry being added corresponds to LDC_SERVICE resource,
829 	 * that is, vswitch connection, it is added to the hash and also
830 	 * the entry is cached, an additional reference count reflects
831 	 * this. The HYBRID resource is not added to the hash, but only
832 	 * cached, as it is only used for sending out packets for unknown
833 	 * unicast destinations.
834 	 */
835 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
836 	    (vresp->refcnt = 1) : (vresp->refcnt = 0);
837 
838 	/*
839 	 * Note: duplicate keys will be rejected by mod_hash.
840 	 */
841 	if (vresp->type != VIO_NET_RES_HYBRID) {
842 		rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
843 		    (mod_hash_val_t)vresp);
844 		if (rv != 0) {
845 			DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
846 			return;
847 		}
848 	}
849 
850 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
851 		/* Cache the fdb entry to vsw-port */
852 		WRITE_ENTER(&vnetp->vsw_fp_rw);
853 		if (vnetp->vsw_fp == NULL)
854 			vnetp->vsw_fp = vresp;
855 		RW_EXIT(&vnetp->vsw_fp_rw);
856 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
857 		/* Cache the fdb entry to hybrid resource */
858 		WRITE_ENTER(&vnetp->vsw_fp_rw);
859 		if (vnetp->hio_fp == NULL)
860 			vnetp->hio_fp = vresp;
861 		RW_EXIT(&vnetp->vsw_fp_rw);
862 	}
863 }
864 
865 /*
866  * Remove an entry from fdb.
867  */
868 static void
869 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp)
870 {
871 	uint64_t	addr = 0;
872 	int		rv;
873 	uint32_t	refcnt;
874 	vnet_res_t	*tmp;
875 
876 	KEY_HASH(addr, vresp->rem_macaddr);
877 
878 	/*
879 	 * Remove the entry from fdb hash table.
880 	 * This prevents further references to this fdb entry.
881 	 */
882 	if (vresp->type != VIO_NET_RES_HYBRID) {
883 		rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
884 		    (mod_hash_val_t *)&tmp);
885 		if (rv != 0) {
886 			/*
887 			 * As the resources are added to the hash only
888 			 * after they are started, this can occur if
889 			 * a resource unregisters before it is ever started.
890 			 */
891 			return;
892 		}
893 	}
894 
895 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
896 		WRITE_ENTER(&vnetp->vsw_fp_rw);
897 
898 		ASSERT(tmp == vnetp->vsw_fp);
899 		vnetp->vsw_fp = NULL;
900 
901 		RW_EXIT(&vnetp->vsw_fp_rw);
902 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
903 		WRITE_ENTER(&vnetp->vsw_fp_rw);
904 
905 		vnetp->hio_fp = NULL;
906 
907 		RW_EXIT(&vnetp->vsw_fp_rw);
908 	}
909 
910 	/*
911 	 * If there are threads already ref holding before the entry was
912 	 * removed from hash table, then wait for ref count to drop to zero.
913 	 */
914 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
915 	    (refcnt = 1) : (refcnt = 0);
916 	while (vresp->refcnt > refcnt) {
917 		delay(drv_usectohz(vnet_fdbe_refcnt_delay));
918 	}
919 }
920 
921 /*
922  * Search fdb for a given mac address. If an entry is found, hold
923  * a reference to it and return the entry; else returns NULL.
924  */
925 static vnet_res_t *
926 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
927 {
928 	uint64_t	key = 0;
929 	vnet_res_t	*vresp;
930 	int		rv;
931 
932 	KEY_HASH(key, addrp->ether_addr_octet);
933 
934 	rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
935 	    (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb);
936 
937 	if (rv != 0)
938 		return (NULL);
939 
940 	return (vresp);
941 }
942 
943 /*
944  * Callback function provided to mod_hash_find_cb(). After finding the fdb
945  * entry corresponding to the key (macaddr), this callback will be invoked by
946  * mod_hash_find_cb() to atomically increment the reference count on the fdb
947  * entry before returning the found entry.
948  */
949 static void
950 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
951 {
952 	_NOTE(ARGUNUSED(key))
953 	VNET_FDBE_REFHOLD((vnet_res_t *)val);
954 }
955 
956 static void
957 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
958 {
959 	vnet_res_t *vresp = (vnet_res_t *)vrh;
960 	vnet_t *vnetp = vresp->vnetp;
961 
962 	if ((vnetp != NULL) && (vnetp->mh)) {
963 		mac_rx(vnetp->mh, NULL, mp);
964 	} else {
965 		freemsgchain(mp);
966 	}
967 }
968 
969 void
970 vnet_tx_update(vio_net_handle_t vrh)
971 {
972 	vnet_res_t *vresp = (vnet_res_t *)vrh;
973 	vnet_t *vnetp = vresp->vnetp;
974 
975 	if ((vnetp != NULL) && (vnetp->mh != NULL)) {
976 		mac_tx_update(vnetp->mh);
977 	}
978 }
979 
980 /*
981  * vio_net_resource_reg -- An interface called to register a resource
982  *	with vnet.
983  *	macp -- a GLDv3 mac_register that has all the details of
984  *		a resource and its callbacks etc.
985  *	type -- resource type.
986  *	local_macaddr -- resource's MAC address. This is used to
987  *			 associate a resource with a corresponding vnet.
988  *	remote_macaddr -- remote side MAC address. This is ignored for
989  *			  the Hybrid resources.
990  *	vhp -- A handle returned to the caller.
991  *	vcb -- A set of callbacks provided to the callers.
992  */
993 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
994     ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
995     vio_net_callbacks_t *vcb)
996 {
997 	vnet_t	*vnetp;
998 	vnet_res_t *vresp;
999 
1000 	vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
1001 	ether_copy(local_macaddr, vresp->local_macaddr);
1002 	ether_copy(rem_macaddr, vresp->rem_macaddr);
1003 	vresp->type = type;
1004 	bcopy(macp, &vresp->macreg, sizeof (mac_register_t));
1005 
1006 	DBG1(NULL, "Resource Registerig type=0%X\n", type);
1007 
1008 	READ_ENTER(&vnet_rw);
1009 	vnetp = vnet_headp;
1010 	while (vnetp != NULL) {
1011 		if (VNET_MATCH_RES(vresp, vnetp)) {
1012 			WRITE_ENTER(&vnetp->vrwlock);
1013 			vresp->vnetp = vnetp;
1014 			vresp->nextp = vnetp->vres_list;
1015 			vnetp->vres_list = vresp;
1016 			RW_EXIT(&vnetp->vrwlock);
1017 			break;
1018 		}
1019 		vnetp = vnetp->nextp;
1020 	}
1021 	RW_EXIT(&vnet_rw);
1022 	if (vresp->vnetp == NULL) {
1023 		DWARN(NULL, "No vnet instance");
1024 		kmem_free(vresp, sizeof (vnet_res_t));
1025 		return (ENXIO);
1026 	}
1027 
1028 	*vhp = vresp;
1029 	vcb->vio_net_rx_cb = vnet_rx;
1030 	vcb->vio_net_tx_update = vnet_tx_update;
1031 	vcb->vio_net_report_err = vnet_handle_res_err;
1032 
1033 	/* Dispatch a task to start resources */
1034 	vnet_dispatch_res_task(vnetp);
1035 	return (0);
1036 }
1037 
1038 /*
1039  * vio_net_resource_unreg -- An interface to unregister a resource.
1040  */
1041 void
1042 vio_net_resource_unreg(vio_net_handle_t vhp)
1043 {
1044 	vnet_res_t *vresp = (vnet_res_t *)vhp;
1045 	vnet_t *vnetp = vresp->vnetp;
1046 	vnet_res_t *vrp;
1047 
1048 	DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
1049 
1050 	ASSERT(vnetp != NULL);
1051 	vnet_fdbe_del(vnetp, vresp);
1052 
1053 	WRITE_ENTER(&vnetp->vrwlock);
1054 	if (vresp == vnetp->vres_list) {
1055 		vnetp->vres_list = vresp->nextp;
1056 	} else {
1057 		vrp = vnetp->vres_list;
1058 		while (vrp->nextp != NULL) {
1059 			if (vrp->nextp == vresp) {
1060 				vrp->nextp = vresp->nextp;
1061 				break;
1062 			}
1063 			vrp = vrp->nextp;
1064 		}
1065 	}
1066 	vresp->vnetp = NULL;
1067 	vresp->nextp = NULL;
1068 	RW_EXIT(&vnetp->vrwlock);
1069 	KMEM_FREE(vresp);
1070 }
1071 
1072 /*
1073  * vnet_dds_rx -- an interface called by vgen to DDS messages.
1074  */
1075 void
1076 vnet_dds_rx(void *arg, void *dmsg)
1077 {
1078 	vnet_t *vnetp = arg;
1079 	vdds_process_dds_msg(vnetp, dmsg);
1080 }
1081 
1082 /*
1083  * vnet_send_dds_msg -- An interface provided to DDS to send
1084  *	DDS messages. This simply sends meessages via vgen.
1085  */
1086 int
1087 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg)
1088 {
1089 	int rv;
1090 
1091 	if (vnetp->vgenhdl != NULL) {
1092 		rv = vgen_dds_tx(vnetp->vgenhdl, dmsg);
1093 	}
1094 	return (rv);
1095 }
1096 
1097 /*
1098  * vnet_handle_res_err -- A callback function called by a resource
1099  *	to report an error. For example, vgen can call to report
1100  *	an LDC down/reset event. This will trigger cleanup of associated
1101  *	Hybrid resource.
1102  */
1103 /* ARGSUSED */
1104 static void
1105 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err)
1106 {
1107 	vnet_res_t *vresp = (vnet_res_t *)vrh;
1108 	vnet_t *vnetp = vresp->vnetp;
1109 
1110 	if (vnetp == NULL) {
1111 		return;
1112 	}
1113 	if ((vresp->type != VIO_NET_RES_LDC_SERVICE) &&
1114 	    (vresp->type != VIO_NET_RES_HYBRID)) {
1115 		return;
1116 	}
1117 	vdds_cleanup_hybrid_res(vnetp);
1118 }
1119 
1120 /*
1121  * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
1122  */
1123 static void
1124 vnet_dispatch_res_task(vnet_t *vnetp)
1125 {
1126 	int rv;
1127 
1128 	WRITE_ENTER(&vnetp->vrwlock);
1129 	if (vnetp->flags & VNET_STARTED) {
1130 		rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task,
1131 		    vnetp, DDI_NOSLEEP);
1132 		if (rv != DDI_SUCCESS) {
1133 			cmn_err(CE_WARN,
1134 			    "vnet%d:Can't dispatch start resource task",
1135 			    vnetp->instance);
1136 		}
1137 	}
1138 	RW_EXIT(&vnetp->vrwlock);
1139 }
1140 
1141 /*
1142  * vnet_res_start_task -- A taskq callback function that starts a resource.
1143  */
1144 static void
1145 vnet_res_start_task(void *arg)
1146 {
1147 	vnet_t *vnetp = arg;
1148 
1149 	WRITE_ENTER(&vnetp->vrwlock);
1150 	if (vnetp->flags & VNET_STARTED) {
1151 		vnet_start_resources(vnetp);
1152 	}
1153 	RW_EXIT(&vnetp->vrwlock);
1154 }
1155 
1156 /*
1157  * vnet_start_resources -- starts all resources associated with
1158  *	a vnet.
1159  */
1160 static void
1161 vnet_start_resources(vnet_t *vnetp)
1162 {
1163 	mac_register_t	*macp;
1164 	mac_callbacks_t	*cbp;
1165 	vnet_res_t	*vresp;
1166 	int rv;
1167 
1168 	DBG1(vnetp, "enter\n");
1169 
1170 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
1171 		/* skip if it is already started */
1172 		if (vresp->flags & VNET_STARTED) {
1173 			continue;
1174 		}
1175 		macp = &vresp->macreg;
1176 		cbp = macp->m_callbacks;
1177 		rv = cbp->mc_start(macp->m_driver);
1178 		if (rv == 0) {
1179 			/*
1180 			 * Successfully started the resource, so now
1181 			 * add it to the fdb.
1182 			 */
1183 			vresp->flags |= VNET_STARTED;
1184 			vnet_fdbe_add(vnetp, vresp);
1185 		}
1186 	}
1187 
1188 	DBG1(vnetp, "exit\n");
1189 
1190 }
1191 
1192 /*
1193  * vnet_stop_resources -- stop all resources associated with a vnet.
1194  */
1195 static void
1196 vnet_stop_resources(vnet_t *vnetp)
1197 {
1198 	vnet_res_t	*vresp;
1199 	vnet_res_t	*nvresp;
1200 	mac_register_t	*macp;
1201 	mac_callbacks_t	*cbp;
1202 
1203 	DBG1(vnetp, "enter\n");
1204 
1205 	for (vresp = vnetp->vres_list; vresp != NULL; ) {
1206 		nvresp = vresp->nextp;
1207 		if (vresp->flags & VNET_STARTED) {
1208 			macp = &vresp->macreg;
1209 			cbp = macp->m_callbacks;
1210 			cbp->mc_stop(macp->m_driver);
1211 			vresp->flags &= ~VNET_STARTED;
1212 		}
1213 		vresp = nvresp;
1214 	}
1215 	DBG1(vnetp, "exit\n");
1216 }
1217