xref: /illumos-gate/usr/src/uts/sun4v/io/vnet.c (revision f6f4cb8ada400367a1921f6b93fb9e02f53ac5e6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/param.h>
30 #include <sys/stream.h>
31 #include <sys/kmem.h>
32 #include <sys/conf.h>
33 #include <sys/devops.h>
34 #include <sys/ksynch.h>
35 #include <sys/stat.h>
36 #include <sys/modctl.h>
37 #include <sys/modhash.h>
38 #include <sys/debug.h>
39 #include <sys/ethernet.h>
40 #include <sys/dlpi.h>
41 #include <net/if.h>
42 #include <sys/mac.h>
43 #include <sys/mac_ether.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/strsun.h>
47 #include <sys/note.h>
48 #include <sys/atomic.h>
49 #include <sys/vnet.h>
50 #include <sys/vlan.h>
51 #include <sys/vnet_mailbox.h>
52 #include <sys/vnet_common.h>
53 #include <sys/dds.h>
54 #include <sys/strsubr.h>
55 #include <sys/taskq.h>
56 
57 /*
58  * Function prototypes.
59  */
60 
61 /* DDI entrypoints */
62 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
63 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
64 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
65 
66 /* MAC entrypoints  */
67 static int vnet_m_stat(void *, uint_t, uint64_t *);
68 static int vnet_m_start(void *);
69 static void vnet_m_stop(void *);
70 static int vnet_m_promisc(void *, boolean_t);
71 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
72 static int vnet_m_unicst(void *, const uint8_t *);
73 mblk_t *vnet_m_tx(void *, mblk_t *);
74 
75 /* vnet internal functions */
76 static int vnet_mac_register(vnet_t *);
77 static int vnet_read_mac_address(vnet_t *vnetp);
78 
79 /* Forwarding database (FDB) routines */
80 static void vnet_fdb_create(vnet_t *vnetp);
81 static void vnet_fdb_destroy(vnet_t *vnetp);
82 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp);
83 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
84 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp);
85 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp);
86 
87 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp);
88 static void vnet_tx_update(vio_net_handle_t vrh);
89 static void vnet_res_start_task(void *arg);
90 static void vnet_start_resources(vnet_t *vnetp);
91 static void vnet_stop_resources(vnet_t *vnetp);
92 static void vnet_dispatch_res_task(vnet_t *vnetp);
93 static void vnet_res_start_task(void *arg);
94 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
95 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
96 
97 /* Exported to to vnet_dds */
98 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
99 
100 /* Externs that are imported from vnet_gen */
101 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
102     const uint8_t *macaddr, void **vgenhdl);
103 extern int vgen_uninit(void *arg);
104 extern int vgen_dds_tx(void *arg, void *dmsg);
105 
106 /* Externs that are imported from vnet_dds */
107 extern void vdds_mod_init(void);
108 extern void vdds_mod_fini(void);
109 extern int vdds_init(vnet_t *vnetp);
110 extern void vdds_cleanup(vnet_t *vnetp);
111 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
112 extern void vdds_cleanup_hybrid_res(vnet_t *vnetp);
113 
114 #define	VNET_FDBE_REFHOLD(p)						\
115 {									\
116 	atomic_inc_32(&(p)->refcnt);					\
117 	ASSERT((p)->refcnt != 0);					\
118 }
119 
120 #define	VNET_FDBE_REFRELE(p)						\
121 {									\
122 	ASSERT((p)->refcnt != 0);					\
123 	atomic_dec_32(&(p)->refcnt);					\
124 }
125 
126 static mac_callbacks_t vnet_m_callbacks = {
127 	0,
128 	vnet_m_stat,
129 	vnet_m_start,
130 	vnet_m_stop,
131 	vnet_m_promisc,
132 	vnet_m_multicst,
133 	vnet_m_unicst,
134 	vnet_m_tx,
135 	NULL,
136 	NULL,
137 	NULL
138 };
139 
140 /*
141  * Linked list of "vnet_t" structures - one per instance.
142  */
143 static vnet_t	*vnet_headp = NULL;
144 static krwlock_t vnet_rw;
145 
146 /* Tunables */
147 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
148 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
149 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
150 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
151 
152 /*
153  * Set this to non-zero to enable additional internal receive buffer pools
154  * based on the MTU of the device for better performance at the cost of more
155  * memory consumption. This is turned off by default, to use allocb(9F) for
156  * receive buffer allocations of sizes > 2K.
157  */
158 boolean_t vnet_jumbo_rxpools = B_FALSE;
159 
160 /* # of chains in fdb hash table */
161 uint32_t	vnet_fdb_nchains = VNET_NFDB_HASH;
162 
163 /* Internal tunables */
164 uint32_t	vnet_ethermtu = 1500;	/* mtu of the device */
165 
166 /*
167  * Default vlan id. This is only used internally when the "default-vlan-id"
168  * property is not present in the MD device node. Therefore, this should not be
169  * used as a tunable; if this value is changed, the corresponding variable
170  * should be updated to the same value in vsw and also other vnets connected to
171  * the same vsw.
172  */
173 uint16_t	vnet_default_vlan_id = 1;
174 
175 /* delay in usec to wait for all references on a fdb entry to be dropped */
176 uint32_t vnet_fdbe_refcnt_delay = 10;
177 
178 static struct ether_addr etherbroadcastaddr = {
179 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
180 };
181 
182 
183 /*
184  * Property names
185  */
186 static char macaddr_propname[] = "local-mac-address";
187 
188 /*
189  * This is the string displayed by modinfo(1m).
190  */
191 static char vnet_ident[] = "vnet driver";
192 extern struct mod_ops mod_driverops;
193 static struct cb_ops cb_vnetops = {
194 	nulldev,		/* cb_open */
195 	nulldev,		/* cb_close */
196 	nodev,			/* cb_strategy */
197 	nodev,			/* cb_print */
198 	nodev,			/* cb_dump */
199 	nodev,			/* cb_read */
200 	nodev,			/* cb_write */
201 	nodev,			/* cb_ioctl */
202 	nodev,			/* cb_devmap */
203 	nodev,			/* cb_mmap */
204 	nodev,			/* cb_segmap */
205 	nochpoll,		/* cb_chpoll */
206 	ddi_prop_op,		/* cb_prop_op */
207 	NULL,			/* cb_stream */
208 	(int)(D_MP)		/* cb_flag */
209 };
210 
211 static struct dev_ops vnetops = {
212 	DEVO_REV,		/* devo_rev */
213 	0,			/* devo_refcnt */
214 	NULL,			/* devo_getinfo */
215 	nulldev,		/* devo_identify */
216 	nulldev,		/* devo_probe */
217 	vnetattach,		/* devo_attach */
218 	vnetdetach,		/* devo_detach */
219 	nodev,			/* devo_reset */
220 	&cb_vnetops,		/* devo_cb_ops */
221 	(struct bus_ops *)NULL	/* devo_bus_ops */
222 };
223 
224 static struct modldrv modldrv = {
225 	&mod_driverops,		/* Type of module.  This one is a driver */
226 	vnet_ident,		/* ID string */
227 	&vnetops		/* driver specific ops */
228 };
229 
230 static struct modlinkage modlinkage = {
231 	MODREV_1, (void *)&modldrv, NULL
232 };
233 
234 #ifdef DEBUG
235 
236 /*
237  * Print debug messages - set to 0xf to enable all msgs
238  */
239 int vnet_dbglevel = 0x8;
240 
241 static void
242 debug_printf(const char *fname, void *arg, const char *fmt, ...)
243 {
244 	char    buf[512];
245 	va_list ap;
246 	vnet_t *vnetp = (vnet_t *)arg;
247 	char    *bufp = buf;
248 
249 	if (vnetp == NULL) {
250 		(void) sprintf(bufp, "%s: ", fname);
251 		bufp += strlen(bufp);
252 	} else {
253 		(void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
254 		bufp += strlen(bufp);
255 	}
256 	va_start(ap, fmt);
257 	(void) vsprintf(bufp, fmt, ap);
258 	va_end(ap);
259 	cmn_err(CE_CONT, "%s\n", buf);
260 }
261 
262 #endif
263 
264 /* _init(9E): initialize the loadable module */
265 int
266 _init(void)
267 {
268 	int status;
269 
270 	DBG1(NULL, "enter\n");
271 
272 	mac_init_ops(&vnetops, "vnet");
273 	status = mod_install(&modlinkage);
274 	if (status != 0) {
275 		mac_fini_ops(&vnetops);
276 	}
277 	vdds_mod_init();
278 	DBG1(NULL, "exit(%d)\n", status);
279 	return (status);
280 }
281 
282 /* _fini(9E): prepare the module for unloading. */
283 int
284 _fini(void)
285 {
286 	int status;
287 
288 	DBG1(NULL, "enter\n");
289 
290 	status = mod_remove(&modlinkage);
291 	if (status != 0)
292 		return (status);
293 	mac_fini_ops(&vnetops);
294 	vdds_mod_fini();
295 
296 	DBG1(NULL, "exit(%d)\n", status);
297 	return (status);
298 }
299 
300 /* _info(9E): return information about the loadable module */
301 int
302 _info(struct modinfo *modinfop)
303 {
304 	return (mod_info(&modlinkage, modinfop));
305 }
306 
307 /*
308  * attach(9E): attach a device to the system.
309  * called once for each instance of the device on the system.
310  */
311 static int
312 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
313 {
314 	vnet_t		*vnetp;
315 	int		status;
316 	int		instance;
317 	uint64_t	reg;
318 	char		qname[TASKQ_NAMELEN];
319 	enum	{ AST_init = 0x0, AST_vnet_alloc = 0x1,
320 		AST_mac_alloc = 0x2, AST_read_macaddr = 0x4,
321 		AST_vgen_init = 0x8, AST_fdbh_alloc = 0x10,
322 		AST_vdds_init = 0x20, AST_taskq_create = 0x40,
323 		AST_vnet_list = 0x80 } attach_state;
324 
325 	attach_state = AST_init;
326 
327 	switch (cmd) {
328 	case DDI_ATTACH:
329 		break;
330 	case DDI_RESUME:
331 	case DDI_PM_RESUME:
332 	default:
333 		goto vnet_attach_fail;
334 	}
335 
336 	instance = ddi_get_instance(dip);
337 	DBG1(NULL, "instance(%d) enter\n", instance);
338 
339 	/* allocate vnet_t and mac_t structures */
340 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
341 	vnetp->dip = dip;
342 	vnetp->instance = instance;
343 	rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL);
344 	rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
345 	attach_state |= AST_vnet_alloc;
346 
347 	status = vdds_init(vnetp);
348 	if (status != 0) {
349 		goto vnet_attach_fail;
350 	}
351 	attach_state |= AST_vdds_init;
352 
353 	/* setup links to vnet_t from both devinfo and mac_t */
354 	ddi_set_driver_private(dip, (caddr_t)vnetp);
355 
356 	/* read the mac address */
357 	status = vnet_read_mac_address(vnetp);
358 	if (status != DDI_SUCCESS) {
359 		goto vnet_attach_fail;
360 	}
361 	attach_state |= AST_read_macaddr;
362 
363 	reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
364 	    DDI_PROP_DONTPASS, "reg", -1);
365 	if (reg == -1) {
366 		goto vnet_attach_fail;
367 	}
368 	vnetp->reg = reg;
369 
370 	vnet_fdb_create(vnetp);
371 	attach_state |= AST_fdbh_alloc;
372 
373 	(void) snprintf(qname, TASKQ_NAMELEN, "vnet_taskq%d", instance);
374 	if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1,
375 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
376 		cmn_err(CE_WARN, "!vnet%d: Unable to create task queue",
377 		    instance);
378 		goto vnet_attach_fail;
379 	}
380 	attach_state |= AST_taskq_create;
381 
382 	/* add to the list of vnet devices */
383 	WRITE_ENTER(&vnet_rw);
384 	vnetp->nextp = vnet_headp;
385 	vnet_headp = vnetp;
386 	RW_EXIT(&vnet_rw);
387 
388 	attach_state |= AST_vnet_list;
389 
390 	/*
391 	 * Initialize the generic vnet plugin which provides
392 	 * communication via sun4v LDC (logical domain channel) based
393 	 * resources. It will register the LDC resources as and when
394 	 * they become available.
395 	 */
396 	status = vgen_init(vnetp, reg, vnetp->dip,
397 	    (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
398 	if (status != DDI_SUCCESS) {
399 		DERR(vnetp, "vgen_init() failed\n");
400 		goto vnet_attach_fail;
401 	}
402 	attach_state |= AST_vgen_init;
403 
404 	/* register with MAC layer */
405 	status = vnet_mac_register(vnetp);
406 	if (status != DDI_SUCCESS) {
407 		goto vnet_attach_fail;
408 	}
409 
410 	DBG1(NULL, "instance(%d) exit\n", instance);
411 	return (DDI_SUCCESS);
412 
413 vnet_attach_fail:
414 
415 	if (attach_state & AST_vnet_list) {
416 		vnet_t		**vnetpp;
417 		/* unlink from instance(vnet_t) list */
418 		WRITE_ENTER(&vnet_rw);
419 		for (vnetpp = &vnet_headp; *vnetpp;
420 		    vnetpp = &(*vnetpp)->nextp) {
421 			if (*vnetpp == vnetp) {
422 				*vnetpp = vnetp->nextp;
423 				break;
424 			}
425 		}
426 		RW_EXIT(&vnet_rw);
427 	}
428 
429 	if (attach_state & AST_vdds_init) {
430 		vdds_cleanup(vnetp);
431 	}
432 	if (attach_state & AST_taskq_create) {
433 		ddi_taskq_destroy(vnetp->taskqp);
434 	}
435 	if (attach_state & AST_fdbh_alloc) {
436 		vnet_fdb_destroy(vnetp);
437 	}
438 	if (attach_state & AST_vgen_init) {
439 		(void) vgen_uninit(vnetp->vgenhdl);
440 	}
441 	if (attach_state & AST_vnet_alloc) {
442 		rw_destroy(&vnetp->vrwlock);
443 		rw_destroy(&vnetp->vsw_fp_rw);
444 		KMEM_FREE(vnetp);
445 	}
446 	return (DDI_FAILURE);
447 }
448 
449 /*
450  * detach(9E): detach a device from the system.
451  */
452 static int
453 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
454 {
455 	vnet_t		*vnetp;
456 	vnet_t		**vnetpp;
457 	int		instance;
458 	int		rv;
459 
460 	instance = ddi_get_instance(dip);
461 	DBG1(NULL, "instance(%d) enter\n", instance);
462 
463 	vnetp = ddi_get_driver_private(dip);
464 	if (vnetp == NULL) {
465 		goto vnet_detach_fail;
466 	}
467 
468 	switch (cmd) {
469 	case DDI_DETACH:
470 		break;
471 	case DDI_SUSPEND:
472 	case DDI_PM_SUSPEND:
473 	default:
474 		goto vnet_detach_fail;
475 	}
476 
477 	(void) vdds_cleanup(vnetp);
478 	rv = vgen_uninit(vnetp->vgenhdl);
479 	if (rv != DDI_SUCCESS) {
480 		goto vnet_detach_fail;
481 	}
482 
483 	/*
484 	 * Unregister from the MAC subsystem.  This can fail, in
485 	 * particular if there are DLPI style-2 streams still open -
486 	 * in which case we just return failure.
487 	 */
488 	if (mac_unregister(vnetp->mh) != 0)
489 		goto vnet_detach_fail;
490 
491 	/* unlink from instance(vnet_t) list */
492 	WRITE_ENTER(&vnet_rw);
493 	for (vnetpp = &vnet_headp; *vnetpp; vnetpp = &(*vnetpp)->nextp) {
494 		if (*vnetpp == vnetp) {
495 			*vnetpp = vnetp->nextp;
496 			break;
497 		}
498 	}
499 	RW_EXIT(&vnet_rw);
500 
501 	ddi_taskq_destroy(vnetp->taskqp);
502 	/* destroy fdb */
503 	vnet_fdb_destroy(vnetp);
504 
505 	rw_destroy(&vnetp->vrwlock);
506 	rw_destroy(&vnetp->vsw_fp_rw);
507 	KMEM_FREE(vnetp);
508 
509 	return (DDI_SUCCESS);
510 
511 vnet_detach_fail:
512 	return (DDI_FAILURE);
513 }
514 
515 /* enable the device for transmit/receive */
516 static int
517 vnet_m_start(void *arg)
518 {
519 	vnet_t		*vnetp = arg;
520 
521 	DBG1(vnetp, "enter\n");
522 
523 	WRITE_ENTER(&vnetp->vrwlock);
524 	vnetp->flags |= VNET_STARTED;
525 	vnet_start_resources(vnetp);
526 	RW_EXIT(&vnetp->vrwlock);
527 
528 	DBG1(vnetp, "exit\n");
529 	return (VNET_SUCCESS);
530 
531 }
532 
533 /* stop transmit/receive for the device */
534 static void
535 vnet_m_stop(void *arg)
536 {
537 	vnet_t		*vnetp = arg;
538 
539 	DBG1(vnetp, "enter\n");
540 
541 	WRITE_ENTER(&vnetp->vrwlock);
542 	if (vnetp->flags & VNET_STARTED) {
543 		vnet_stop_resources(vnetp);
544 		vnetp->flags &= ~VNET_STARTED;
545 	}
546 	RW_EXIT(&vnetp->vrwlock);
547 
548 	DBG1(vnetp, "exit\n");
549 }
550 
551 /* set the unicast mac address of the device */
552 static int
553 vnet_m_unicst(void *arg, const uint8_t *macaddr)
554 {
555 	_NOTE(ARGUNUSED(macaddr))
556 
557 	vnet_t *vnetp = arg;
558 
559 	DBG1(vnetp, "enter\n");
560 	/*
561 	 * NOTE: setting mac address dynamically is not supported.
562 	 */
563 	DBG1(vnetp, "exit\n");
564 
565 	return (VNET_FAILURE);
566 }
567 
568 /* enable/disable a multicast address */
569 static int
570 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
571 {
572 	_NOTE(ARGUNUSED(add, mca))
573 
574 	vnet_t *vnetp = arg;
575 	vnet_res_t	*vresp;
576 	mac_register_t	*macp;
577 	mac_callbacks_t	*cbp;
578 	int rv = VNET_SUCCESS;
579 
580 	DBG1(vnetp, "enter\n");
581 
582 	READ_ENTER(&vnetp->vrwlock);
583 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
584 		if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
585 			macp = &vresp->macreg;
586 			cbp = macp->m_callbacks;
587 			rv = cbp->mc_multicst(macp->m_driver, add, mca);
588 		}
589 	}
590 	RW_EXIT(&vnetp->vrwlock);
591 
592 	DBG1(vnetp, "exit(%d)\n", rv);
593 	return (rv);
594 }
595 
596 /* set or clear promiscuous mode on the device */
597 static int
598 vnet_m_promisc(void *arg, boolean_t on)
599 {
600 	_NOTE(ARGUNUSED(on))
601 
602 	vnet_t *vnetp = arg;
603 	DBG1(vnetp, "enter\n");
604 	/*
605 	 * NOTE: setting promiscuous mode is not supported, just return success.
606 	 */
607 	DBG1(vnetp, "exit\n");
608 	return (VNET_SUCCESS);
609 }
610 
611 /*
612  * Transmit a chain of packets. This function provides switching functionality
613  * based on the destination mac address to reach other guests (within ldoms) or
614  * external hosts.
615  */
616 mblk_t *
617 vnet_m_tx(void *arg, mblk_t *mp)
618 {
619 	vnet_t			*vnetp;
620 	vnet_res_t		*vresp;
621 	mblk_t			*next;
622 	mblk_t			*resid_mp;
623 	mac_register_t		*macp;
624 	struct ether_header	*ehp;
625 	boolean_t		is_unicast;
626 
627 	vnetp = (vnet_t *)arg;
628 	DBG1(vnetp, "enter\n");
629 	ASSERT(mp != NULL);
630 
631 	while (mp != NULL) {
632 
633 		next = mp->b_next;
634 		mp->b_next = NULL;
635 
636 		/*
637 		 * Find fdb entry for the destination
638 		 * and hold a reference to it.
639 		 */
640 		ehp = (struct ether_header *)mp->b_rptr;
641 		vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
642 		if (vresp != NULL) {
643 
644 			/*
645 			 * Destination found in FDB.
646 			 * The destination is a vnet device within ldoms
647 			 * and directly reachable, invoke the tx function
648 			 * in the fdb entry.
649 			 */
650 			macp = &vresp->macreg;
651 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
652 
653 			/* tx done; now release ref on fdb entry */
654 			VNET_FDBE_REFRELE(vresp);
655 
656 			if (resid_mp != NULL) {
657 				/* m_tx failed */
658 				mp->b_next = next;
659 				break;
660 			}
661 		} else {
662 			is_unicast = !(IS_BROADCAST(ehp) ||
663 			    (IS_MULTICAST(ehp)));
664 			/*
665 			 * Destination is not in FDB.
666 			 * If the destination is broadcast or multicast,
667 			 * then forward the packet to vswitch.
668 			 * If a Hybrid resource avilable, then send the
669 			 * unicast packet via hybrid resource, otherwise
670 			 * forward it to vswitch.
671 			 */
672 			READ_ENTER(&vnetp->vsw_fp_rw);
673 
674 			if ((is_unicast) && (vnetp->hio_fp != NULL)) {
675 				vresp = vnetp->hio_fp;
676 			} else {
677 				vresp = vnetp->vsw_fp;
678 			}
679 			if (vresp == NULL) {
680 				/*
681 				 * no fdb entry to vsw? drop the packet.
682 				 */
683 				RW_EXIT(&vnetp->vsw_fp_rw);
684 				freemsg(mp);
685 				mp = next;
686 				continue;
687 			}
688 
689 			/* ref hold the fdb entry to vsw */
690 			VNET_FDBE_REFHOLD(vresp);
691 
692 			RW_EXIT(&vnetp->vsw_fp_rw);
693 
694 			macp = &vresp->macreg;
695 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
696 
697 			/* tx done; now release ref on fdb entry */
698 			VNET_FDBE_REFRELE(vresp);
699 
700 			if (resid_mp != NULL) {
701 				/* m_tx failed */
702 				mp->b_next = next;
703 				break;
704 			}
705 		}
706 
707 		mp = next;
708 	}
709 
710 	DBG1(vnetp, "exit\n");
711 	return (mp);
712 }
713 
714 /* get statistics from the device */
715 int
716 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
717 {
718 	vnet_t *vnetp = arg;
719 	vnet_res_t	*vresp;
720 	mac_register_t	*macp;
721 	mac_callbacks_t	*cbp;
722 	uint64_t val_total = 0;
723 
724 	DBG1(vnetp, "enter\n");
725 
726 	/*
727 	 * get the specified statistic from each transport and return the
728 	 * aggregate val.  This obviously only works for counters.
729 	 */
730 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
731 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
732 		return (ENOTSUP);
733 	}
734 
735 	READ_ENTER(&vnetp->vrwlock);
736 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
737 		macp = &vresp->macreg;
738 		cbp = macp->m_callbacks;
739 		if (cbp->mc_getstat(macp->m_driver, stat, val) == 0)
740 			val_total += *val;
741 	}
742 	RW_EXIT(&vnetp->vrwlock);
743 
744 	*val = val_total;
745 
746 	DBG1(vnetp, "exit\n");
747 	return (0);
748 }
749 
750 /* wrapper function for mac_register() */
751 static int
752 vnet_mac_register(vnet_t *vnetp)
753 {
754 	mac_register_t	*macp;
755 	int		err;
756 
757 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
758 		return (DDI_FAILURE);
759 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
760 	macp->m_driver = vnetp;
761 	macp->m_dip = vnetp->dip;
762 	macp->m_src_addr = vnetp->curr_macaddr;
763 	macp->m_callbacks = &vnet_m_callbacks;
764 	macp->m_min_sdu = 0;
765 	macp->m_max_sdu = vnetp->mtu;
766 	macp->m_margin = VLAN_TAGSZ;
767 
768 	/*
769 	 * Finally, we're ready to register ourselves with the MAC layer
770 	 * interface; if this succeeds, we're all ready to start()
771 	 */
772 	err = mac_register(macp, &vnetp->mh);
773 	mac_free(macp);
774 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
775 }
776 
777 /* read the mac address of the device */
778 static int
779 vnet_read_mac_address(vnet_t *vnetp)
780 {
781 	uchar_t 	*macaddr;
782 	uint32_t 	size;
783 	int 		rv;
784 
785 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
786 	    DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
787 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
788 		DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
789 		    macaddr_propname, rv);
790 		return (DDI_FAILURE);
791 	}
792 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
793 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
794 	ddi_prop_free(macaddr);
795 
796 	return (DDI_SUCCESS);
797 }
798 
799 static void
800 vnet_fdb_create(vnet_t *vnetp)
801 {
802 	char		hashname[MAXNAMELEN];
803 
804 	(void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
805 	    vnetp->instance);
806 	vnetp->fdb_nchains = vnet_fdb_nchains;
807 	vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
808 	    mod_hash_null_valdtor, sizeof (void *));
809 }
810 
811 static void
812 vnet_fdb_destroy(vnet_t *vnetp)
813 {
814 	/* destroy fdb-hash-table */
815 	if (vnetp->fdb_hashp != NULL) {
816 		mod_hash_destroy_hash(vnetp->fdb_hashp);
817 		vnetp->fdb_hashp = NULL;
818 		vnetp->fdb_nchains = 0;
819 	}
820 }
821 
822 /*
823  * Add an entry into the fdb.
824  */
825 void
826 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp)
827 {
828 	uint64_t	addr = 0;
829 	int		rv;
830 
831 	KEY_HASH(addr, vresp->rem_macaddr);
832 
833 	/*
834 	 * If the entry being added corresponds to LDC_SERVICE resource,
835 	 * that is, vswitch connection, it is added to the hash and also
836 	 * the entry is cached, an additional reference count reflects
837 	 * this. The HYBRID resource is not added to the hash, but only
838 	 * cached, as it is only used for sending out packets for unknown
839 	 * unicast destinations.
840 	 */
841 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
842 	    (vresp->refcnt = 1) : (vresp->refcnt = 0);
843 
844 	/*
845 	 * Note: duplicate keys will be rejected by mod_hash.
846 	 */
847 	if (vresp->type != VIO_NET_RES_HYBRID) {
848 		rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
849 		    (mod_hash_val_t)vresp);
850 		if (rv != 0) {
851 			DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
852 			return;
853 		}
854 	}
855 
856 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
857 		/* Cache the fdb entry to vsw-port */
858 		WRITE_ENTER(&vnetp->vsw_fp_rw);
859 		if (vnetp->vsw_fp == NULL)
860 			vnetp->vsw_fp = vresp;
861 		RW_EXIT(&vnetp->vsw_fp_rw);
862 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
863 		/* Cache the fdb entry to hybrid resource */
864 		WRITE_ENTER(&vnetp->vsw_fp_rw);
865 		if (vnetp->hio_fp == NULL)
866 			vnetp->hio_fp = vresp;
867 		RW_EXIT(&vnetp->vsw_fp_rw);
868 	}
869 }
870 
871 /*
872  * Remove an entry from fdb.
873  */
874 static void
875 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp)
876 {
877 	uint64_t	addr = 0;
878 	int		rv;
879 	uint32_t	refcnt;
880 	vnet_res_t	*tmp;
881 
882 	KEY_HASH(addr, vresp->rem_macaddr);
883 
884 	/*
885 	 * Remove the entry from fdb hash table.
886 	 * This prevents further references to this fdb entry.
887 	 */
888 	if (vresp->type != VIO_NET_RES_HYBRID) {
889 		rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
890 		    (mod_hash_val_t *)&tmp);
891 		if (rv != 0) {
892 			/*
893 			 * As the resources are added to the hash only
894 			 * after they are started, this can occur if
895 			 * a resource unregisters before it is ever started.
896 			 */
897 			return;
898 		}
899 	}
900 
901 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
902 		WRITE_ENTER(&vnetp->vsw_fp_rw);
903 
904 		ASSERT(tmp == vnetp->vsw_fp);
905 		vnetp->vsw_fp = NULL;
906 
907 		RW_EXIT(&vnetp->vsw_fp_rw);
908 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
909 		WRITE_ENTER(&vnetp->vsw_fp_rw);
910 
911 		vnetp->hio_fp = NULL;
912 
913 		RW_EXIT(&vnetp->vsw_fp_rw);
914 	}
915 
916 	/*
917 	 * If there are threads already ref holding before the entry was
918 	 * removed from hash table, then wait for ref count to drop to zero.
919 	 */
920 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
921 	    (refcnt = 1) : (refcnt = 0);
922 	while (vresp->refcnt > refcnt) {
923 		delay(drv_usectohz(vnet_fdbe_refcnt_delay));
924 	}
925 }
926 
927 /*
928  * Search fdb for a given mac address. If an entry is found, hold
929  * a reference to it and return the entry; else returns NULL.
930  */
931 static vnet_res_t *
932 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
933 {
934 	uint64_t	key = 0;
935 	vnet_res_t	*vresp;
936 	int		rv;
937 
938 	KEY_HASH(key, addrp->ether_addr_octet);
939 
940 	rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
941 	    (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb);
942 
943 	if (rv != 0)
944 		return (NULL);
945 
946 	return (vresp);
947 }
948 
949 /*
950  * Callback function provided to mod_hash_find_cb(). After finding the fdb
951  * entry corresponding to the key (macaddr), this callback will be invoked by
952  * mod_hash_find_cb() to atomically increment the reference count on the fdb
953  * entry before returning the found entry.
954  */
955 static void
956 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
957 {
958 	_NOTE(ARGUNUSED(key))
959 	VNET_FDBE_REFHOLD((vnet_res_t *)val);
960 }
961 
962 static void
963 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
964 {
965 	vnet_res_t *vresp = (vnet_res_t *)vrh;
966 	vnet_t *vnetp = vresp->vnetp;
967 
968 	if ((vnetp != NULL) && (vnetp->mh)) {
969 		mac_rx(vnetp->mh, NULL, mp);
970 	} else {
971 		freemsgchain(mp);
972 	}
973 }
974 
975 void
976 vnet_tx_update(vio_net_handle_t vrh)
977 {
978 	vnet_res_t *vresp = (vnet_res_t *)vrh;
979 	vnet_t *vnetp = vresp->vnetp;
980 
981 	if ((vnetp != NULL) && (vnetp->mh != NULL)) {
982 		mac_tx_update(vnetp->mh);
983 	}
984 }
985 
986 /*
987  * Update the new mtu of vnet into the mac layer. First check if the device has
988  * been plumbed and if so fail the mtu update. Returns 0 on success.
989  */
990 int
991 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu)
992 {
993 	int	rv;
994 
995 	if (vnetp == NULL || vnetp->mh == NULL) {
996 		return (EINVAL);
997 	}
998 
999 	WRITE_ENTER(&vnetp->vrwlock);
1000 
1001 	if (vnetp->flags & VNET_STARTED) {
1002 		RW_EXIT(&vnetp->vrwlock);
1003 		cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu "
1004 		    "update as the device is plumbed\n",
1005 		    vnetp->instance);
1006 		return (EBUSY);
1007 	}
1008 
1009 	/* update mtu in the mac layer */
1010 	rv = mac_maxsdu_update(vnetp->mh, mtu);
1011 	if (rv != 0) {
1012 		RW_EXIT(&vnetp->vrwlock);
1013 		cmn_err(CE_NOTE,
1014 		    "!vnet%d: Unable to update mtu with mac layer\n",
1015 		    vnetp->instance);
1016 		return (EIO);
1017 	}
1018 
1019 	vnetp->mtu = mtu;
1020 
1021 	RW_EXIT(&vnetp->vrwlock);
1022 
1023 	return (0);
1024 }
1025 
1026 /*
1027  * vio_net_resource_reg -- An interface called to register a resource
1028  *	with vnet.
1029  *	macp -- a GLDv3 mac_register that has all the details of
1030  *		a resource and its callbacks etc.
1031  *	type -- resource type.
1032  *	local_macaddr -- resource's MAC address. This is used to
1033  *			 associate a resource with a corresponding vnet.
1034  *	remote_macaddr -- remote side MAC address. This is ignored for
1035  *			  the Hybrid resources.
1036  *	vhp -- A handle returned to the caller.
1037  *	vcb -- A set of callbacks provided to the callers.
1038  */
1039 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
1040     ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
1041     vio_net_callbacks_t *vcb)
1042 {
1043 	vnet_t	*vnetp;
1044 	vnet_res_t *vresp;
1045 
1046 	vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
1047 	ether_copy(local_macaddr, vresp->local_macaddr);
1048 	ether_copy(rem_macaddr, vresp->rem_macaddr);
1049 	vresp->type = type;
1050 	bcopy(macp, &vresp->macreg, sizeof (mac_register_t));
1051 
1052 	DBG1(NULL, "Resource Registerig type=0%X\n", type);
1053 
1054 	READ_ENTER(&vnet_rw);
1055 	vnetp = vnet_headp;
1056 	while (vnetp != NULL) {
1057 		if (VNET_MATCH_RES(vresp, vnetp)) {
1058 			WRITE_ENTER(&vnetp->vrwlock);
1059 			vresp->vnetp = vnetp;
1060 			vresp->nextp = vnetp->vres_list;
1061 			vnetp->vres_list = vresp;
1062 			RW_EXIT(&vnetp->vrwlock);
1063 			break;
1064 		}
1065 		vnetp = vnetp->nextp;
1066 	}
1067 	RW_EXIT(&vnet_rw);
1068 	if (vresp->vnetp == NULL) {
1069 		DWARN(NULL, "No vnet instance");
1070 		kmem_free(vresp, sizeof (vnet_res_t));
1071 		return (ENXIO);
1072 	}
1073 
1074 	*vhp = vresp;
1075 	vcb->vio_net_rx_cb = vnet_rx;
1076 	vcb->vio_net_tx_update = vnet_tx_update;
1077 	vcb->vio_net_report_err = vnet_handle_res_err;
1078 
1079 	/* Dispatch a task to start resources */
1080 	vnet_dispatch_res_task(vnetp);
1081 	return (0);
1082 }
1083 
1084 /*
1085  * vio_net_resource_unreg -- An interface to unregister a resource.
1086  */
1087 void
1088 vio_net_resource_unreg(vio_net_handle_t vhp)
1089 {
1090 	vnet_res_t *vresp = (vnet_res_t *)vhp;
1091 	vnet_t *vnetp = vresp->vnetp;
1092 	vnet_res_t *vrp;
1093 
1094 	DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
1095 
1096 	ASSERT(vnetp != NULL);
1097 	vnet_fdbe_del(vnetp, vresp);
1098 
1099 	WRITE_ENTER(&vnetp->vrwlock);
1100 	if (vresp == vnetp->vres_list) {
1101 		vnetp->vres_list = vresp->nextp;
1102 	} else {
1103 		vrp = vnetp->vres_list;
1104 		while (vrp->nextp != NULL) {
1105 			if (vrp->nextp == vresp) {
1106 				vrp->nextp = vresp->nextp;
1107 				break;
1108 			}
1109 			vrp = vrp->nextp;
1110 		}
1111 	}
1112 	vresp->vnetp = NULL;
1113 	vresp->nextp = NULL;
1114 	RW_EXIT(&vnetp->vrwlock);
1115 	KMEM_FREE(vresp);
1116 }
1117 
1118 /*
1119  * vnet_dds_rx -- an interface called by vgen to DDS messages.
1120  */
1121 void
1122 vnet_dds_rx(void *arg, void *dmsg)
1123 {
1124 	vnet_t *vnetp = arg;
1125 	vdds_process_dds_msg(vnetp, dmsg);
1126 }
1127 
1128 /*
1129  * vnet_send_dds_msg -- An interface provided to DDS to send
1130  *	DDS messages. This simply sends meessages via vgen.
1131  */
1132 int
1133 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg)
1134 {
1135 	int rv;
1136 
1137 	if (vnetp->vgenhdl != NULL) {
1138 		rv = vgen_dds_tx(vnetp->vgenhdl, dmsg);
1139 	}
1140 	return (rv);
1141 }
1142 
1143 /*
1144  * vnet_handle_res_err -- A callback function called by a resource
1145  *	to report an error. For example, vgen can call to report
1146  *	an LDC down/reset event. This will trigger cleanup of associated
1147  *	Hybrid resource.
1148  */
1149 /* ARGSUSED */
1150 static void
1151 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err)
1152 {
1153 	vnet_res_t *vresp = (vnet_res_t *)vrh;
1154 	vnet_t *vnetp = vresp->vnetp;
1155 
1156 	if (vnetp == NULL) {
1157 		return;
1158 	}
1159 	if ((vresp->type != VIO_NET_RES_LDC_SERVICE) &&
1160 	    (vresp->type != VIO_NET_RES_HYBRID)) {
1161 		return;
1162 	}
1163 	vdds_cleanup_hybrid_res(vnetp);
1164 }
1165 
1166 /*
1167  * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
1168  */
1169 static void
1170 vnet_dispatch_res_task(vnet_t *vnetp)
1171 {
1172 	int rv;
1173 
1174 	WRITE_ENTER(&vnetp->vrwlock);
1175 	if (vnetp->flags & VNET_STARTED) {
1176 		rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task,
1177 		    vnetp, DDI_NOSLEEP);
1178 		if (rv != DDI_SUCCESS) {
1179 			cmn_err(CE_WARN,
1180 			    "vnet%d:Can't dispatch start resource task",
1181 			    vnetp->instance);
1182 		}
1183 	}
1184 	RW_EXIT(&vnetp->vrwlock);
1185 }
1186 
1187 /*
1188  * vnet_res_start_task -- A taskq callback function that starts a resource.
1189  */
1190 static void
1191 vnet_res_start_task(void *arg)
1192 {
1193 	vnet_t *vnetp = arg;
1194 
1195 	WRITE_ENTER(&vnetp->vrwlock);
1196 	if (vnetp->flags & VNET_STARTED) {
1197 		vnet_start_resources(vnetp);
1198 	}
1199 	RW_EXIT(&vnetp->vrwlock);
1200 }
1201 
1202 /*
1203  * vnet_start_resources -- starts all resources associated with
1204  *	a vnet.
1205  */
1206 static void
1207 vnet_start_resources(vnet_t *vnetp)
1208 {
1209 	mac_register_t	*macp;
1210 	mac_callbacks_t	*cbp;
1211 	vnet_res_t	*vresp;
1212 	int rv;
1213 
1214 	DBG1(vnetp, "enter\n");
1215 
1216 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
1217 		/* skip if it is already started */
1218 		if (vresp->flags & VNET_STARTED) {
1219 			continue;
1220 		}
1221 		macp = &vresp->macreg;
1222 		cbp = macp->m_callbacks;
1223 		rv = cbp->mc_start(macp->m_driver);
1224 		if (rv == 0) {
1225 			/*
1226 			 * Successfully started the resource, so now
1227 			 * add it to the fdb.
1228 			 */
1229 			vresp->flags |= VNET_STARTED;
1230 			vnet_fdbe_add(vnetp, vresp);
1231 		}
1232 	}
1233 
1234 	DBG1(vnetp, "exit\n");
1235 
1236 }
1237 
1238 /*
1239  * vnet_stop_resources -- stop all resources associated with a vnet.
1240  */
1241 static void
1242 vnet_stop_resources(vnet_t *vnetp)
1243 {
1244 	vnet_res_t	*vresp;
1245 	vnet_res_t	*nvresp;
1246 	mac_register_t	*macp;
1247 	mac_callbacks_t	*cbp;
1248 
1249 	DBG1(vnetp, "enter\n");
1250 
1251 	for (vresp = vnetp->vres_list; vresp != NULL; ) {
1252 		nvresp = vresp->nextp;
1253 		if (vresp->flags & VNET_STARTED) {
1254 			macp = &vresp->macreg;
1255 			cbp = macp->m_callbacks;
1256 			cbp->mc_stop(macp->m_driver);
1257 			vresp->flags &= ~VNET_STARTED;
1258 		}
1259 		vresp = nvresp;
1260 	}
1261 	DBG1(vnetp, "exit\n");
1262 }
1263