xref: /titanic_50/usr/src/uts/common/io/vnic/vnic_dev.c (revision c869993e79c1eafbec61a56bf6cea848fe754c71)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/sysmacros.h>
30 #include <sys/conf.h>
31 #include <sys/cmn_err.h>
32 #include <sys/list.h>
33 #include <sys/ksynch.h>
34 #include <sys/kmem.h>
35 #include <sys/stream.h>
36 #include <sys/modctl.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/atomic.h>
40 #include <sys/stat.h>
41 #include <sys/modhash.h>
42 #include <sys/strsubr.h>
43 #include <sys/strsun.h>
44 #include <sys/dlpi.h>
45 #include <sys/mac.h>
46 #include <sys/mac_ether.h>
47 #include <sys/pattr.h>
48 #if 0
49 #include <sys/vlan.h>
50 #endif
51 #include <sys/vnic.h>
52 #include <sys/vnic_impl.h>
53 #include <sys/gld.h>
54 #include <inet/ip.h>
55 #include <inet/ip_impl.h>
56 
57 static int vnic_m_start(void *);
58 static void vnic_m_stop(void *);
59 static int vnic_m_promisc(void *, boolean_t);
60 static int vnic_m_multicst(void *, boolean_t, const uint8_t *);
61 static int vnic_m_unicst(void *, const uint8_t *);
62 static int vnic_m_stat(void *, uint_t, uint64_t *);
63 static void vnic_m_resources(void *);
64 static mblk_t *vnic_m_tx(void *, mblk_t *);
65 static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *);
66 static void vnic_mac_free(vnic_mac_t *);
67 static uint_t vnic_info_walker(mod_hash_key_t, mod_hash_val_t *, void *);
68 static void vnic_notify_cb(void *, mac_notify_type_t);
69 static int vnic_modify_mac_addr(vnic_t *, uint_t, uchar_t *);
70 static mblk_t *vnic_active_tx(void *, mblk_t *);
71 static int vnic_promisc_set(vnic_t *, boolean_t);
72 
73 static kmem_cache_t	*vnic_cache;
74 static kmem_cache_t	*vnic_mac_cache;
75 static krwlock_t	vnic_lock;
76 static kmutex_t		vnic_mac_lock;
77 static uint_t		vnic_count;
78 
79 /* hash of VNICs (vnic_t's), keyed by VNIC id */
80 static mod_hash_t	*vnic_hash;
81 #define	VNIC_HASHSZ	64
82 #define	VNIC_HASH_KEY(vnic_id)	((mod_hash_key_t)(uintptr_t)vnic_id)
83 
84 /*
85  * Hash of underlying open MACs (vnic_mac_t's), keyed by the string
86  * "<device name><instance number>/<port number>".
87  */
88 static mod_hash_t	*vnic_mac_hash;
89 #define	VNIC_MAC_HASHSZ	64
90 
91 #define	VNIC_MAC_REFHOLD(va) {			\
92 	ASSERT(MUTEX_HELD(&vnic_mac_lock));	\
93 	(va)->va_refs++;			\
94 	ASSERT((va)->va_refs != 0);		\
95 }
96 
97 #define	VNIC_MAC_REFRELE(va) {			\
98 	ASSERT(MUTEX_HELD(&vnic_mac_lock));	\
99 	ASSERT((va)->va_refs != 0);		\
100 	if (--((va)->va_refs) == 0)		\
101 		vnic_mac_free(va);		\
102 }
103 
104 static uchar_t vnic_brdcst_mac[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
105 
106 /* used by vnic_walker */
107 typedef struct vnic_info_state {
108 	uint32_t	vs_vnic_id;
109 	char		vs_dev_name[MAXNAMELEN];
110 	boolean_t	vs_vnic_found;
111 	vnic_info_new_vnic_fn_t	vs_new_vnic_fn;
112 	void		*vs_fn_arg;
113 	int		vs_rc;
114 } vnic_info_state_t;
115 
116 #define	VNIC_M_CALLBACK_FLAGS	(MC_RESOURCES | MC_GETCAPAB)
117 
118 static mac_callbacks_t vnic_m_callbacks = {
119 	VNIC_M_CALLBACK_FLAGS,
120 	vnic_m_stat,
121 	vnic_m_start,
122 	vnic_m_stop,
123 	vnic_m_promisc,
124 	vnic_m_multicst,
125 	vnic_m_unicst,
126 	vnic_m_tx,
127 	vnic_m_resources,
128 	NULL,			/* m_ioctl */
129 	vnic_m_capab_get
130 };
131 
132 /* ARGSUSED */
133 static int
134 vnic_mac_ctor(void *buf, void *arg, int kmflag)
135 {
136 	vnic_mac_t *vnic_mac = buf;
137 
138 	bzero(vnic_mac, sizeof (vnic_mac_t));
139 	rw_init(&vnic_mac->va_bcast_grp_lock, NULL, RW_DRIVER, NULL);
140 	rw_init(&vnic_mac->va_promisc_lock, NULL, RW_DRIVER, NULL);
141 
142 	return (0);
143 }
144 
145 /* ARGSUSED */
146 static void
147 vnic_mac_dtor(void *buf, void *arg)
148 {
149 	vnic_mac_t *vnic_mac = buf;
150 
151 	rw_destroy(&vnic_mac->va_promisc_lock);
152 	rw_destroy(&vnic_mac->va_bcast_grp_lock);
153 }
154 
155 void
156 vnic_dev_init(void)
157 {
158 	vnic_cache = kmem_cache_create("vnic_cache",
159 	    sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
160 
161 	vnic_mac_cache = kmem_cache_create("vnic_mac_cache",
162 	    sizeof (vnic_mac_t), 0, vnic_mac_ctor, vnic_mac_dtor,
163 	    NULL, NULL, NULL, 0);
164 
165 	vnic_hash = mod_hash_create_idhash("vnic_hash",
166 	    VNIC_HASHSZ, mod_hash_null_valdtor);
167 
168 	vnic_mac_hash = mod_hash_create_strhash("vnic_mac_hash",
169 	    VNIC_MAC_HASHSZ, mod_hash_null_valdtor);
170 
171 	rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL);
172 
173 	mutex_init(&vnic_mac_lock, NULL, MUTEX_DEFAULT, NULL);
174 
175 	vnic_count = 0;
176 }
177 
178 void
179 vnic_dev_fini(void)
180 {
181 	ASSERT(vnic_count == 0);
182 
183 	mutex_destroy(&vnic_mac_lock);
184 	rw_destroy(&vnic_lock);
185 	mod_hash_destroy_strhash(vnic_mac_hash);
186 	mod_hash_destroy_idhash(vnic_hash);
187 	kmem_cache_destroy(vnic_mac_cache);
188 	kmem_cache_destroy(vnic_cache);
189 }
190 
191 uint_t
192 vnic_dev_count(void)
193 {
194 	return (vnic_count);
195 }
196 
197 static int
198 vnic_mac_open(const char *dev_name, vnic_mac_t **vmp)
199 {
200 	char *str_key;
201 	int err;
202 	vnic_mac_t *vnic_mac = NULL;
203 	const mac_info_t *mip;
204 
205 	*vmp = NULL;
206 
207 	mutex_enter(&vnic_mac_lock);
208 
209 	err = mod_hash_find(vnic_mac_hash, (mod_hash_key_t)dev_name,
210 	    (mod_hash_val_t *)&vnic_mac);
211 	if (err == 0) {
212 		/* this MAC is already opened, increment reference count */
213 		VNIC_MAC_REFHOLD(vnic_mac);
214 		mutex_exit(&vnic_mac_lock);
215 		*vmp = vnic_mac;
216 		return (0);
217 	}
218 
219 	vnic_mac = kmem_cache_alloc(vnic_mac_cache, KM_SLEEP);
220 
221 	if ((err = mac_open(dev_name, &vnic_mac->va_mh)) != 0) {
222 		vnic_mac->va_mh = NULL;
223 		goto bail;
224 	}
225 
226 	/* only ethernet support, for now */
227 	mip = mac_info(vnic_mac->va_mh);
228 	if (mip->mi_media != DL_ETHER) {
229 		err = ENOTSUP;
230 		goto bail;
231 	}
232 	if (mip->mi_media != mip->mi_nativemedia) {
233 		err = ENOTSUP;
234 		goto bail;
235 	}
236 
237 	(void) strcpy(vnic_mac->va_dev_name, dev_name);
238 
239 	/* add entry to hash table */
240 	str_key = kmem_alloc(strlen(dev_name) + 1, KM_SLEEP);
241 	(void) strcpy(str_key, dev_name);
242 	err = mod_hash_insert(vnic_mac_hash, (mod_hash_key_t)str_key,
243 	    (mod_hash_val_t)vnic_mac);
244 	ASSERT(err == 0);
245 
246 	/* initialize the flow table associated with lower MAC */
247 	vnic_mac->va_addr_len = ETHERADDRL;
248 	(void) vnic_classifier_flow_tab_init(vnic_mac, vnic_mac->va_addr_len,
249 	    KM_SLEEP);
250 
251 	vnic_mac->va_txinfo = mac_vnic_tx_get(vnic_mac->va_mh);
252 	vnic_mac->va_notify_hdl = mac_notify_add(vnic_mac->va_mh,
253 	    vnic_notify_cb, vnic_mac);
254 
255 	VNIC_MAC_REFHOLD(vnic_mac);
256 	*vmp = vnic_mac;
257 	mutex_exit(&vnic_mac_lock);
258 	return (0);
259 
260 bail:
261 	if (vnic_mac != NULL) {
262 		if (vnic_mac->va_mh != NULL)
263 			mac_close(vnic_mac->va_mh);
264 		kmem_cache_free(vnic_mac_cache, vnic_mac);
265 	}
266 	mutex_exit(&vnic_mac_lock);
267 	return (err);
268 }
269 
270 /*
271  * Create a new flow for the active MAC client sharing the NIC
272  * with the VNICs. This allows the unicast packets for that NIC
273  * to be classified and passed up to the active MAC client. It
274  * also allows packets sent from a VNIC to the active link to
275  * be classified by the VNIC transmit function and delivered via
276  * the MAC module locally. Returns B_TRUE on success, B_FALSE on
277  * failure.
278  */
279 static int
280 vnic_init_active_rx(vnic_mac_t *vnic_mac)
281 {
282 	uchar_t nic_mac_addr[MAXMACADDRLEN];
283 
284 	if (vnic_mac->va_active_flow != NULL)
285 		return (B_TRUE);
286 
287 	mac_unicst_get(vnic_mac->va_mh, nic_mac_addr);
288 
289 	vnic_mac->va_active_flow = vnic_classifier_flow_create(
290 	    vnic_mac->va_addr_len, nic_mac_addr, NULL, B_TRUE, KM_SLEEP);
291 
292 	vnic_classifier_flow_add(vnic_mac, vnic_mac->va_active_flow,
293 	    (vnic_rx_fn_t)mac_active_rx, vnic_mac->va_mh, NULL);
294 	return (B_TRUE);
295 }
296 
297 static void
298 vnic_fini_active_rx(vnic_mac_t *vnic_mac)
299 {
300 	if (vnic_mac->va_active_flow == NULL)
301 		return;
302 
303 	vnic_classifier_flow_remove(vnic_mac, vnic_mac->va_active_flow);
304 	vnic_classifier_flow_destroy(vnic_mac->va_active_flow);
305 	vnic_mac->va_active_flow = NULL;
306 }
307 
308 static void
309 vnic_update_active_rx(vnic_mac_t *vnic_mac)
310 {
311 	if (vnic_mac->va_active_flow == NULL)
312 		return;
313 
314 	vnic_fini_active_rx(vnic_mac);
315 	(void) vnic_init_active_rx(vnic_mac);
316 }
317 
318 /*
319  * Copy an mblk, preserving its hardware checksum flags.
320  */
321 mblk_t *
322 vnic_copymsg_cksum(mblk_t *mp)
323 {
324 	mblk_t *mp1;
325 	uint32_t start, stuff, end, value, flags;
326 
327 	mp1 = copymsg(mp);
328 	if (mp1 == NULL)
329 		return (NULL);
330 
331 	hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags);
332 	(void) hcksum_assoc(mp1, NULL, NULL, start, stuff, end, value,
333 	    flags, KM_NOSLEEP);
334 
335 	return (mp1);
336 }
337 
338 /*
339  * Copy an mblk chain, presenting the hardware checksum flags of the
340  * individual mblks.
341  */
342 mblk_t *
343 vnic_copymsgchain_cksum(mblk_t *mp)
344 {
345 	mblk_t *nmp = NULL;
346 	mblk_t **nmpp = &nmp;
347 
348 	for (; mp != NULL; mp = mp->b_next) {
349 		if ((*nmpp = vnic_copymsg_cksum(mp)) == NULL) {
350 			freemsgchain(nmp);
351 			return (NULL);
352 		}
353 
354 		nmpp = &((*nmpp)->b_next);
355 	}
356 
357 	return (nmp);
358 }
359 
360 
361 /*
362  * Process the specified mblk chain for proper handling of hardware
363  * checksum offload. This routine is invoked for loopback VNIC traffic.
364  * The function handles a NULL mblk chain passed as argument.
365  */
366 mblk_t *
367 vnic_fix_cksum(mblk_t *mp_chain)
368 {
369 	mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1;
370 	uint32_t flags, start, stuff, end, value;
371 
372 	for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) {
373 		uint16_t len;
374 		uint32_t offset;
375 		struct ether_header *ehp;
376 		uint16_t sap;
377 
378 		hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value,
379 		    &flags);
380 		if (flags == 0)
381 			continue;
382 
383 		/*
384 		 * Since the processing of checksum offload for loopback
385 		 * traffic requires modification of the packet contents,
386 		 * ensure sure that we are always modifying our own copy.
387 		 */
388 		if (DB_REF(mp) > 1) {
389 			mp1 = copymsg(mp);
390 			if (mp1 == NULL)
391 				continue;
392 			mp1->b_next = mp->b_next;
393 			mp->b_next = NULL;
394 			freemsg(mp);
395 			if (prev != NULL)
396 				prev->b_next = mp1;
397 			else
398 				new_chain = mp1;
399 			mp = mp1;
400 		}
401 
402 		/*
403 		 * Ethernet, and optionally VLAN header.
404 		 */
405 		/*LINTED*/
406 		ehp = (struct ether_header *)mp->b_rptr;
407 		if (ntohs(ehp->ether_type) == VLAN_TPID) {
408 			struct ether_vlan_header *evhp;
409 
410 			ASSERT(MBLKL(mp) >=
411 			    sizeof (struct ether_vlan_header));
412 			/*LINTED*/
413 			evhp = (struct ether_vlan_header *)mp->b_rptr;
414 			sap = ntohs(evhp->ether_type);
415 			offset = sizeof (struct ether_vlan_header);
416 		} else {
417 			sap = ntohs(ehp->ether_type);
418 			offset = sizeof (struct ether_header);
419 		}
420 
421 		if (MBLKL(mp) <= offset) {
422 			offset -= MBLKL(mp);
423 			if (mp->b_cont == NULL) {
424 				/* corrupted packet, skip it */
425 				if (prev != NULL)
426 					prev->b_next = mp->b_next;
427 				else
428 					new_chain = mp->b_next;
429 				mp1 = mp->b_next;
430 				mp->b_next = NULL;
431 				freemsg(mp);
432 				mp = mp1;
433 				continue;
434 			}
435 			mp = mp->b_cont;
436 		}
437 
438 		if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) {
439 			ipha_t *ipha = NULL;
440 
441 			/*
442 			 * In order to compute the full and header
443 			 * checksums, we need to find and parse
444 			 * the IP and/or ULP headers.
445 			 */
446 
447 			sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap;
448 
449 			/*
450 			 * IP header.
451 			 */
452 			if (sap != ETHERTYPE_IP)
453 				continue;
454 
455 			ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t));
456 			/*LINTED*/
457 			ipha = (ipha_t *)(mp->b_rptr + offset);
458 
459 			if (flags & HCK_FULLCKSUM) {
460 				ipaddr_t src, dst;
461 				uint32_t cksum;
462 				uint16_t *up;
463 				uint8_t proto;
464 
465 				/*
466 				 * Pointer to checksum field in ULP header.
467 				 */
468 				proto = ipha->ipha_protocol;
469 				ASSERT(ipha->ipha_version_and_hdr_length ==
470 				    IP_SIMPLE_HDR_VERSION);
471 				if (proto == IPPROTO_TCP) {
472 					/*LINTED*/
473 					up = IPH_TCPH_CHECKSUMP(ipha,
474 					    IP_SIMPLE_HDR_LENGTH);
475 				} else {
476 					ASSERT(proto == IPPROTO_UDP);
477 					/*LINTED*/
478 					up = IPH_UDPH_CHECKSUMP(ipha,
479 					    IP_SIMPLE_HDR_LENGTH);
480 				}
481 
482 				/*
483 				 * Pseudo-header checksum.
484 				 */
485 				src = ipha->ipha_src;
486 				dst = ipha->ipha_dst;
487 				len = ntohs(ipha->ipha_length) -
488 				    IP_SIMPLE_HDR_LENGTH;
489 
490 				cksum = (dst >> 16) + (dst & 0xFFFF) +
491 				    (src >> 16) + (src & 0xFFFF);
492 				cksum += htons(len);
493 
494 				/*
495 				 * The checksum value stored in the packet needs
496 				 * to be correct. Compute it here.
497 				 */
498 				*up = 0;
499 				cksum += (((proto) == IPPROTO_UDP) ?
500 				    IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP);
501 				cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH +
502 				    offset, cksum);
503 				*(up) = (uint16_t)(cksum ? cksum : ~cksum);
504 
505 				flags |= HCK_FULLCKSUM_OK;
506 				value = 0xffff;
507 			}
508 
509 			if (flags & HCK_IPV4_HDRCKSUM) {
510 				ASSERT(ipha != NULL);
511 				ipha->ipha_hdr_checksum =
512 				    (uint16_t)ip_csum_hdr(ipha);
513 			}
514 		}
515 
516 		if (flags & HCK_PARTIALCKSUM) {
517 			uint16_t *up, partial, cksum;
518 			uchar_t *ipp; /* ptr to beginning of IP header */
519 
520 			if (mp->b_cont != NULL) {
521 				mblk_t *mp1;
522 
523 				mp1 = msgpullup(mp, offset + end);
524 				if (mp1 == NULL)
525 					continue;
526 				mp1->b_next = mp->b_next;
527 				mp->b_next = NULL;
528 				freemsg(mp);
529 				if (prev != NULL)
530 					prev->b_next = mp1;
531 				else
532 					new_chain = mp1;
533 				mp = mp1;
534 			}
535 
536 			ipp = mp->b_rptr + offset;
537 			/*LINTED*/
538 			up = (uint16_t *)((uchar_t *)ipp + stuff);
539 			partial = *up;
540 			*up = 0;
541 
542 			cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start,
543 			    end - start, partial);
544 			cksum = ~cksum;
545 			*up = cksum ? cksum : ~cksum;
546 
547 			/*
548 			 * Since we already computed the whole checksum,
549 			 * indicate to the stack that it has already
550 			 * been verified by the hardware.
551 			 */
552 			flags &= ~HCK_PARTIALCKSUM;
553 			flags |= (HCK_FULLCKSUM | HCK_FULLCKSUM_OK);
554 			value = 0xffff;
555 		}
556 
557 		(void) hcksum_assoc(mp, NULL, NULL, start, stuff, end,
558 		    value, flags, KM_NOSLEEP);
559 	}
560 
561 	return (new_chain);
562 }
563 
564 static void
565 vnic_mac_close(vnic_mac_t *vnic_mac)
566 {
567 	mutex_enter(&vnic_mac_lock);
568 	VNIC_MAC_REFRELE(vnic_mac);
569 	mutex_exit(&vnic_mac_lock);
570 }
571 
572 static void
573 vnic_mac_free(vnic_mac_t *vnic_mac)
574 {
575 	mod_hash_val_t val;
576 
577 	ASSERT(MUTEX_HELD(&vnic_mac_lock));
578 	vnic_fini_active_rx(vnic_mac);
579 	mac_notify_remove(vnic_mac->va_mh, vnic_mac->va_notify_hdl);
580 	if (vnic_mac->va_mac_set) {
581 		vnic_mac->va_mac_set = B_FALSE;
582 		mac_vnic_clear(vnic_mac->va_mh);
583 	}
584 	vnic_classifier_flow_tab_fini(vnic_mac);
585 	mac_close(vnic_mac->va_mh);
586 
587 	(void) mod_hash_remove(vnic_mac_hash,
588 	    (mod_hash_key_t)vnic_mac->va_dev_name, &val);
589 	ASSERT(vnic_mac == (vnic_mac_t *)val);
590 
591 	kmem_cache_free(vnic_mac_cache, vnic_mac);
592 }
593 
594 /*
595  * Initial VNIC receive routine. Invoked for packets that are steered
596  * to a VNIC but the VNIC has not been started yet.
597  */
598 /* ARGSUSED */
599 static void
600 vnic_rx_initial(void *arg1, void *arg2, mblk_t *mp_chain)
601 {
602 	vnic_t *vnic = arg1;
603 	mblk_t *mp;
604 
605 	/* update stats */
606 	for (mp = mp_chain; mp != NULL; mp = mp->b_next)
607 		vnic->vn_stat_ierrors++;
608 	freemsgchain(mp_chain);
609 }
610 
611 /*
612  * VNIC receive routine invoked after the classifier for the VNIC
613  * has been initialized and the VNIC has been started.
614  */
615 /* ARGSUSED */
616 void
617 vnic_rx(void *arg1, void *arg2, mblk_t *mp_chain)
618 {
619 	vnic_t *vnic = arg1;
620 	mblk_t *mp;
621 
622 	/* update stats */
623 	for (mp = mp_chain; mp != NULL; mp = mp->b_next) {
624 		vnic->vn_stat_ipackets++;
625 		vnic->vn_stat_rbytes += msgdsize(mp);
626 	}
627 
628 	/* pass packet up */
629 	mac_rx(vnic->vn_mh, NULL, mp_chain);
630 }
631 
632 /*
633  * Routine to create a MAC-based VNIC. Adds the passed MAC address
634  * to an unused slot in the NIC if one is available. Otherwise it
635  * sets the NIC in promiscuous mode and assigns the MAC address to
636  * a Rx ring if available or a soft ring.
637  */
638 static int
639 vnic_add_unicstaddr(vnic_t *vnic, mac_multi_addr_t *maddr)
640 {
641 	vnic_mac_t *vnic_mac = vnic->vn_vnic_mac;
642 	int err;
643 
644 	if (mac_unicst_verify(vnic_mac->va_mh, maddr->mma_addr,
645 	    maddr->mma_addrlen) == B_FALSE)
646 		return (EINVAL);
647 
648 	if (mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_MULTIADDRESS,
649 	    &(vnic->vn_mma_capab))) {
650 		if (vnic->vn_maddr_naddrfree == 0) {
651 			/*
652 			 * No free address slots available.
653 			 * Enable promiscuous mode.
654 			 */
655 			goto set_promisc;
656 		}
657 
658 		err = vnic->vn_maddr_add(vnic->vn_maddr_handle, maddr);
659 		if (err != 0) {
660 			if (err == ENOSPC) {
661 				/*
662 				 * There was a race to add addresses
663 				 * with other multiple address consumers,
664 				 * and we lost out. Use promisc mode.
665 				 */
666 				goto set_promisc;
667 			}
668 
669 			return (err);
670 		}
671 
672 		vnic->vn_slot_id = maddr->mma_slot;
673 		vnic->vn_multi_mac = B_TRUE;
674 	} else {
675 		/*
676 		 * Either multiple MAC address support is not
677 		 * available or all available addresses have
678 		 * been used up.
679 		 */
680 	set_promisc:
681 		err = mac_promisc_set(vnic_mac->va_mh, B_TRUE, MAC_DEVPROMISC);
682 		if (err != 0) {
683 			return (err);
684 		}
685 
686 		vnic->vn_promisc_mac = B_TRUE;
687 	}
688 	return (err);
689 }
690 
691 /*
692  * VNIC is getting deleted. Remove the MAC address from the slot.
693  * If promiscuous mode was being used, then unset the promiscuous mode.
694  */
695 static int
696 vnic_remove_unicstaddr(vnic_t *vnic)
697 {
698 	vnic_mac_t *vnic_mac = vnic->vn_vnic_mac;
699 	int err;
700 
701 	if (vnic->vn_multi_mac) {
702 		ASSERT(vnic->vn_promisc_mac == B_FALSE);
703 		err = vnic->vn_maddr_remove(vnic->vn_maddr_handle,
704 		    vnic->vn_slot_id);
705 		vnic->vn_multi_mac = B_FALSE;
706 	}
707 
708 	if (vnic->vn_promisc_mac) {
709 		ASSERT(vnic->vn_multi_mac == B_FALSE);
710 		err = mac_promisc_set(vnic_mac->va_mh, B_FALSE, MAC_DEVPROMISC);
711 		vnic->vn_promisc_mac = B_FALSE;
712 	}
713 
714 	return (err);
715 }
716 
717 /*
718  * Create a new VNIC upon request from administrator.
719  * Returns 0 on success, an errno on failure.
720  */
721 int
722 vnic_dev_create(uint_t vnic_id, char *dev_name, int mac_len, uchar_t *mac_addr)
723 {
724 	vnic_t *vnic = NULL;
725 	mac_register_t *mac;
726 	int err;
727 	vnic_mac_t *vnic_mac;
728 	const mac_info_t *lower_mac_info;
729 	mac_multi_addr_t maddr;
730 	mac_txinfo_t tx_info;
731 
732 	if (mac_len != ETHERADDRL) {
733 		/* currently only ethernet NICs are supported */
734 		return (EINVAL);
735 	}
736 
737 	rw_enter(&vnic_lock, RW_WRITER);
738 
739 	/* does a VNIC with the same id already exist? */
740 	err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
741 	    (mod_hash_val_t *)&vnic);
742 	if (err == 0) {
743 		rw_exit(&vnic_lock);
744 		return (EEXIST);
745 	}
746 
747 	vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP);
748 	if (vnic == NULL) {
749 		rw_exit(&vnic_lock);
750 		return (ENOMEM);
751 	}
752 
753 	/* open underlying MAC */
754 	err = vnic_mac_open(dev_name, &vnic_mac);
755 	if (err != 0) {
756 		kmem_cache_free(vnic_cache, vnic);
757 		rw_exit(&vnic_lock);
758 		return (err);
759 	}
760 
761 	bzero(vnic, sizeof (*vnic));
762 	vnic->vn_id = vnic_id;
763 	vnic->vn_vnic_mac = vnic_mac;
764 
765 	vnic->vn_started = B_FALSE;
766 	vnic->vn_promisc = B_FALSE;
767 	vnic->vn_multi_mac = B_FALSE;
768 	vnic->vn_bcast_grp = B_FALSE;
769 
770 	/* set the VNIC MAC address */
771 	maddr.mma_addrlen = mac_len;
772 	maddr.mma_slot = 0;
773 	maddr.mma_flags = 0;
774 	bcopy(mac_addr, maddr.mma_addr, mac_len);
775 	if ((err = vnic_add_unicstaddr(vnic, &maddr)) != 0)
776 		goto bail;
777 	bcopy(mac_addr, vnic->vn_addr, mac_len);
778 
779 	/* set the initial VNIC capabilities */
780 	if (!mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_HCKSUM,
781 	    &vnic->vn_hcksum_txflags))
782 		vnic->vn_hcksum_txflags = 0;
783 
784 	/* register with the MAC module */
785 	if ((mac = mac_alloc(MAC_VERSION)) == NULL)
786 		goto bail;
787 
788 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
789 	mac->m_driver = vnic;
790 	mac->m_dip = vnic_get_dip();
791 	mac->m_instance = vnic_id;
792 	mac->m_src_addr = vnic->vn_addr;
793 	mac->m_callbacks = &vnic_m_callbacks;
794 
795 	lower_mac_info = mac_info(vnic_mac->va_mh);
796 	mac->m_min_sdu = lower_mac_info->mi_sdu_min;
797 	mac->m_max_sdu = lower_mac_info->mi_sdu_max;
798 
799 	err = mac_register(mac, &vnic->vn_mh);
800 	mac_free(mac);
801 	if (err != 0)
802 		goto bail;
803 
804 	/* add new VNIC to hash table */
805 	err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id),
806 	    (mod_hash_val_t)vnic);
807 	ASSERT(err == 0);
808 	vnic_count++;
809 
810 	rw_exit(&vnic_lock);
811 
812 	/* Create a flow, initialized with the MAC address of the VNIC */
813 	if ((vnic->vn_flow_ent = vnic_classifier_flow_create(mac_len, mac_addr,
814 	    NULL, B_FALSE, KM_SLEEP)) == NULL) {
815 		(void) vnic_dev_delete(vnic_id);
816 		vnic = NULL;
817 		err = ENOMEM;
818 		goto bail_unlocked;
819 	}
820 
821 	vnic_classifier_flow_add(vnic_mac, vnic->vn_flow_ent, vnic_rx_initial,
822 	    vnic, vnic);
823 
824 	/* setup VNIC to receive broadcast packets */
825 	err = vnic_bcast_add(vnic, vnic_brdcst_mac, MAC_ADDRTYPE_BROADCAST);
826 	if (err != 0) {
827 		(void) vnic_dev_delete(vnic_id);
828 		vnic = NULL;
829 		goto bail_unlocked;
830 	}
831 	vnic->vn_bcast_grp = B_TRUE;
832 
833 	mutex_enter(&vnic_mac_lock);
834 	if (!vnic_mac->va_mac_set) {
835 		/*
836 		 * We want to MAC layer to call the VNIC tx outbound
837 		 * routine, so that local broadcast packets sent by
838 		 * the active interface sharing the underlying NIC (if
839 		 * any), can be broadcast to every VNIC.
840 		 */
841 		tx_info.mt_fn = vnic_active_tx;
842 		tx_info.mt_arg = vnic_mac;
843 		if (!mac_vnic_set(vnic_mac->va_mh, &tx_info,
844 		    vnic_m_capab_get, vnic)) {
845 			mutex_exit(&vnic_mac_lock);
846 			(void) vnic_dev_delete(vnic_id);
847 			vnic = NULL;
848 			err = EBUSY;
849 			goto bail_unlocked;
850 		}
851 		vnic_mac->va_mac_set = B_TRUE;
852 	}
853 	mutex_exit(&vnic_mac_lock);
854 
855 	/* allow passing packets to NIC's active MAC client */
856 	if (!vnic_init_active_rx(vnic_mac)) {
857 		(void) vnic_dev_delete(vnic_id);
858 		vnic = NULL;
859 		err = ENOMEM;
860 		goto bail_unlocked;
861 	}
862 
863 	return (0);
864 
865 bail:
866 	(void) vnic_remove_unicstaddr(vnic);
867 	vnic_mac_close(vnic_mac);
868 	rw_exit(&vnic_lock);
869 
870 bail_unlocked:
871 	if (vnic != NULL) {
872 		kmem_cache_free(vnic_cache, vnic);
873 	}
874 
875 	return (err);
876 }
877 
878 /*
879  * Modify the properties of an existing VNIC.
880  */
881 /* ARGSUSED */
882 int
883 vnic_dev_modify(uint_t vnic_id, uint_t modify_mask,
884     vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr)
885 {
886 	vnic_t *vnic = NULL;
887 	int rv = 0;
888 	boolean_t notify_mac_addr = B_FALSE;
889 
890 	rw_enter(&vnic_lock, RW_WRITER);
891 
892 	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
893 	    (mod_hash_val_t *)&vnic) != 0) {
894 		rw_exit(&vnic_lock);
895 		return (ENOENT);
896 	}
897 
898 	if (modify_mask & VNIC_IOC_MODIFY_ADDR) {
899 		rv = vnic_modify_mac_addr(vnic, mac_len, mac_addr);
900 		if (rv == 0)
901 			notify_mac_addr = B_TRUE;
902 	}
903 
904 	rw_exit(&vnic_lock);
905 
906 	if (notify_mac_addr)
907 		mac_unicst_update(vnic->vn_mh, mac_addr);
908 
909 	return (rv);
910 }
911 
912 int
913 vnic_dev_delete(uint_t vnic_id)
914 {
915 	vnic_t *vnic = NULL;
916 	mod_hash_val_t val;
917 	vnic_flow_t *flent;
918 	int rc;
919 	vnic_mac_t *vnic_mac;
920 
921 	rw_enter(&vnic_lock, RW_WRITER);
922 
923 	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
924 	    (mod_hash_val_t *)&vnic) != 0) {
925 		rw_exit(&vnic_lock);
926 		return (ENOENT);
927 	}
928 
929 	/*
930 	 * We cannot unregister the MAC yet. Unregistering would
931 	 * free up mac_impl_t which should not happen at this time.
932 	 * Packets could be entering vnic_rx() through the
933 	 * flow entry and so mac_impl_t cannot be NULL. So disable
934 	 * mac_impl_t by calling mac_disable(). This will prevent any
935 	 * new claims on mac_impl_t.
936 	 */
937 	if (mac_disable(vnic->vn_mh) != 0) {
938 		rw_exit(&vnic_lock);
939 		return (EBUSY);
940 	}
941 
942 	(void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val);
943 	ASSERT(vnic == (vnic_t *)val);
944 
945 	if (vnic->vn_bcast_grp)
946 		(void) vnic_bcast_delete(vnic, vnic_brdcst_mac);
947 
948 	flent = vnic->vn_flow_ent;
949 	if (flent != NULL) {
950 		/*
951 		 * vnic_classifier_flow_destroy() ensures that the
952 		 * flow is no longer used.
953 		 */
954 		vnic_classifier_flow_remove(vnic->vn_vnic_mac, flent);
955 		vnic_classifier_flow_destroy(flent);
956 	}
957 
958 	rc = mac_unregister(vnic->vn_mh);
959 	ASSERT(rc == 0);
960 	(void) vnic_remove_unicstaddr(vnic);
961 	vnic_mac = vnic->vn_vnic_mac;
962 	kmem_cache_free(vnic_cache, vnic);
963 	vnic_count--;
964 	rw_exit(&vnic_lock);
965 	vnic_mac_close(vnic_mac);
966 	return (0);
967 }
968 
969 /*
970  * For the specified packet chain, return a sub-chain to be sent
971  * and the transmit function to be used to send the packet. Also
972  * return a pointer to the sub-chain of packets that should
973  * be re-classified. If the function returns NULL, the packet
974  * should be sent using the underlying NIC.
975  */
976 static vnic_flow_t *
977 vnic_classify(vnic_mac_t *vnic_mac, mblk_t *mp, mblk_t **mp_chain_rest)
978 {
979 	vnic_flow_t *flow_ent;
980 
981 	/* one packet at a time */
982 	*mp_chain_rest = mp->b_next;
983 	mp->b_next = NULL;
984 
985 	/* do classification on the packet */
986 	flow_ent = vnic_classifier_get_flow(vnic_mac, mp);
987 
988 	return (flow_ent);
989 }
990 
991 /*
992  * Send a packet chain to a local VNIC or an active MAC client.
993  */
994 static void
995 vnic_local_tx(vnic_mac_t *vnic_mac, vnic_flow_t *flow_ent, mblk_t *mp_chain)
996 {
997 	mblk_t *mp1;
998 	const vnic_flow_fn_info_t *fn_info;
999 	vnic_t *vnic;
1000 
1001 	if (!vnic_classifier_is_active(flow_ent) &&
1002 	    mac_promisc_get(vnic_mac->va_mh, MAC_PROMISC)) {
1003 		/*
1004 		 * If the MAC is in promiscous mode,
1005 		 * send a copy of the active client.
1006 		 */
1007 		if ((mp1 = vnic_copymsgchain_cksum(mp_chain)) == NULL)
1008 			goto sendit;
1009 		if ((mp1 = vnic_fix_cksum(mp1)) == NULL)
1010 			goto sendit;
1011 		mac_active_rx(vnic_mac->va_mh, NULL, mp1);
1012 	}
1013 sendit:
1014 	fn_info = vnic_classifier_get_fn_info(flow_ent);
1015 	/*
1016 	 * If the vnic to which we would deliver this packet is in
1017 	 * promiscuous mode then it already received the packet via
1018 	 * vnic_promisc_rx().
1019 	 *
1020 	 * XXX assumes that ff_arg2 is a vnic_t pointer if it is
1021 	 * non-NULL (currently always true).
1022 	 */
1023 	vnic = (vnic_t *)fn_info->ff_arg2;
1024 	if ((vnic != NULL) && vnic->vn_promisc)
1025 		freemsg(mp_chain);
1026 	else if ((mp1 = vnic_fix_cksum(mp_chain)) != NULL)
1027 		(fn_info->ff_fn)(fn_info->ff_arg1, fn_info->ff_arg2, mp1);
1028 }
1029 
1030 /*
1031  * This function is invoked when a MAC client needs to send a packet
1032  * to a NIC which is shared by VNICs. It is passed to the MAC layer
1033  * by a call to mac_vnic_set() when the NIC is opened, and is returned
1034  * to MAC clients by mac_tx_get() when VNICs are present.
1035  */
1036 mblk_t *
1037 vnic_active_tx(void *arg, mblk_t *mp_chain)
1038 {
1039 	vnic_mac_t *vnic_mac = arg;
1040 	mblk_t *mp, *extra_mp = NULL;
1041 	vnic_flow_t *flow_ent;
1042 	void *flow_cookie;
1043 	const mac_txinfo_t *mtp = vnic_mac->va_txinfo;
1044 
1045 	for (mp = mp_chain; mp != NULL; mp = extra_mp) {
1046 		mblk_t *next;
1047 
1048 		next = mp->b_next;
1049 		mp->b_next = NULL;
1050 
1051 		vnic_promisc_rx(vnic_mac, (vnic_t *)-1, mp);
1052 
1053 		flow_ent = vnic_classify(vnic_mac, mp, &extra_mp);
1054 		ASSERT(extra_mp == NULL);
1055 		extra_mp = next;
1056 
1057 		if (flow_ent != NULL) {
1058 			flow_cookie = vnic_classifier_get_client_cookie(
1059 			    flow_ent);
1060 			if (flow_cookie != NULL) {
1061 				/*
1062 				 * Send a copy to every VNIC defined on the
1063 				 * interface, as well as the underlying MAC.
1064 				 */
1065 				vnic_bcast_send(flow_cookie, (vnic_t *)-1, mp);
1066 			} else {
1067 				/*
1068 				 * loopback the packet to a local VNIC or
1069 				 * an active MAC client.
1070 				 */
1071 				vnic_local_tx(vnic_mac, flow_ent, mp);
1072 			}
1073 			VNIC_FLOW_REFRELE(flow_ent);
1074 			mp_chain = NULL;
1075 		} else {
1076 			/*
1077 			 * Non-VNIC destination, send via the underlying
1078 			 * NIC. In order to avoid a recursive call
1079 			 * to this function, we ensured that mtp points
1080 			 * to the unerlying NIC transmit function
1081 			 * by inilizating through mac_vnic_tx_get().
1082 			 */
1083 			mp_chain = mtp->mt_fn(mtp->mt_arg, mp);
1084 			if (mp_chain != NULL)
1085 				break;
1086 		}
1087 	}
1088 
1089 	if ((mp_chain != NULL) && (extra_mp != NULL)) {
1090 		ASSERT(mp_chain->b_next == NULL);
1091 		mp_chain->b_next = extra_mp;
1092 	}
1093 	return (mp_chain);
1094 }
1095 
1096 /*
1097  * VNIC transmit function.
1098  */
1099 mblk_t *
1100 vnic_m_tx(void *arg, mblk_t *mp_chain)
1101 {
1102 	vnic_t *vnic = arg;
1103 	vnic_mac_t *vnic_mac = vnic->vn_vnic_mac;
1104 	mblk_t *mp, *extra_mp = NULL;
1105 	vnic_flow_t *flow_ent;
1106 	void *flow_cookie;
1107 
1108 	/*
1109 	 * Update stats.
1110 	 */
1111 	for (mp = mp_chain; mp != NULL; mp = mp->b_next) {
1112 		vnic->vn_stat_opackets++;
1113 		vnic->vn_stat_obytes += msgdsize(mp);
1114 	}
1115 
1116 	for (mp = mp_chain; mp != NULL; mp = extra_mp) {
1117 		mblk_t *next;
1118 
1119 		next = mp->b_next;
1120 		mp->b_next = NULL;
1121 
1122 		vnic_promisc_rx(vnic->vn_vnic_mac, vnic, mp);
1123 
1124 		flow_ent = vnic_classify(vnic->vn_vnic_mac, mp, &extra_mp);
1125 		ASSERT(extra_mp == NULL);
1126 		extra_mp = next;
1127 
1128 		if (flow_ent != NULL) {
1129 			flow_cookie = vnic_classifier_get_client_cookie(
1130 			    flow_ent);
1131 			if (flow_cookie != NULL) {
1132 				/*
1133 				 * The vnic_bcast_send function expects
1134 				 * to receive the sender VNIC as value
1135 				 * for arg2.
1136 				 */
1137 				vnic_bcast_send(flow_cookie, vnic, mp);
1138 			} else {
1139 				/*
1140 				 * loopback the packet to a local VNIC or
1141 				 * an active MAC client.
1142 				 */
1143 				vnic_local_tx(vnic_mac, flow_ent, mp);
1144 			}
1145 			VNIC_FLOW_REFRELE(flow_ent);
1146 			mp_chain = NULL;
1147 		} else {
1148 			/*
1149 			 * Non-local destination, send via the underlying
1150 			 * NIC.
1151 			 */
1152 			const mac_txinfo_t *mtp = vnic->vn_txinfo;
1153 			mp_chain = mtp->mt_fn(mtp->mt_arg, mp);
1154 			if (mp_chain != NULL)
1155 				break;
1156 		}
1157 	}
1158 
1159 	/* update stats to account for unsent packets */
1160 	for (mp = mp_chain; mp != NULL; mp = mp->b_next) {
1161 		vnic->vn_stat_opackets--;
1162 		vnic->vn_stat_obytes -= msgdsize(mp);
1163 		vnic->vn_stat_oerrors++;
1164 		/*
1165 		 * link back in the last portion not counted due to bandwidth
1166 		 * control.
1167 		 */
1168 		if (mp->b_next == NULL) {
1169 			mp->b_next = extra_mp;
1170 			break;
1171 		}
1172 	}
1173 
1174 	return (mp_chain);
1175 }
1176 
1177 /* ARGSUSED */
1178 static void
1179 vnic_m_resources(void *arg)
1180 {
1181 	/* no resources to advertise */
1182 }
1183 
1184 static int
1185 vnic_m_stat(void *arg, uint_t stat, uint64_t *val)
1186 {
1187 	vnic_t *vnic = arg;
1188 	int rval = 0;
1189 
1190 	rw_enter(&vnic_lock, RW_READER);
1191 
1192 	switch (stat) {
1193 	case ETHER_STAT_LINK_DUPLEX:
1194 		*val = mac_stat_get(vnic->vn_vnic_mac->va_mh,
1195 		    ETHER_STAT_LINK_DUPLEX);
1196 		break;
1197 	case MAC_STAT_IFSPEED:
1198 		*val = mac_stat_get(vnic->vn_vnic_mac->va_mh,
1199 		    MAC_STAT_IFSPEED);
1200 		break;
1201 	case MAC_STAT_MULTIRCV:
1202 		*val = vnic->vn_stat_multircv;
1203 		break;
1204 	case MAC_STAT_BRDCSTRCV:
1205 		*val = vnic->vn_stat_brdcstrcv;
1206 		break;
1207 	case MAC_STAT_MULTIXMT:
1208 		*val = vnic->vn_stat_multixmt;
1209 		break;
1210 	case MAC_STAT_BRDCSTXMT:
1211 		*val = vnic->vn_stat_brdcstxmt;
1212 		break;
1213 	case MAC_STAT_IERRORS:
1214 		*val = vnic->vn_stat_ierrors;
1215 		break;
1216 	case MAC_STAT_OERRORS:
1217 		*val = vnic->vn_stat_oerrors;
1218 		break;
1219 	case MAC_STAT_RBYTES:
1220 		*val = vnic->vn_stat_rbytes;
1221 		break;
1222 	case MAC_STAT_IPACKETS:
1223 		*val = vnic->vn_stat_ipackets;
1224 		break;
1225 	case MAC_STAT_OBYTES:
1226 		*val = vnic->vn_stat_obytes;
1227 		break;
1228 	case MAC_STAT_OPACKETS:
1229 		*val = vnic->vn_stat_opackets;
1230 		break;
1231 	default:
1232 		rval = ENOTSUP;
1233 	}
1234 
1235 	rw_exit(&vnic_lock);
1236 	return (rval);
1237 }
1238 
1239 /*
1240  * Return information about the specified capability.
1241  */
1242 /* ARGSUSED */
1243 static boolean_t
1244 vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
1245 {
1246 	vnic_t *vnic = arg;
1247 
1248 	switch (cap) {
1249 	case MAC_CAPAB_POLL:
1250 		return (B_TRUE);
1251 	case MAC_CAPAB_HCKSUM: {
1252 		uint32_t *hcksum_txflags = cap_data;
1253 
1254 		*hcksum_txflags = vnic->vn_hcksum_txflags &
1255 		    (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM |
1256 		    HCKSUM_INET_PARTIAL);
1257 		break;
1258 	}
1259 	default:
1260 		return (B_FALSE);
1261 	}
1262 	return (B_TRUE);
1263 }
1264 
1265 static int
1266 vnic_m_start(void *arg)
1267 {
1268 	vnic_t *vnic = arg;
1269 	mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh;
1270 	int rc;
1271 
1272 	rc = mac_start(lower_mh);
1273 	if (rc != 0)
1274 		return (rc);
1275 
1276 	vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx, vnic, vnic);
1277 	return (0);
1278 }
1279 
1280 static void
1281 vnic_m_stop(void *arg)
1282 {
1283 	vnic_t *vnic = arg;
1284 	mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh;
1285 
1286 	vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx_initial,
1287 	    vnic, vnic);
1288 	mac_stop(lower_mh);
1289 }
1290 
1291 /* ARGSUSED */
1292 static int
1293 vnic_m_promisc(void *arg, boolean_t on)
1294 {
1295 	vnic_t *vnic = arg;
1296 
1297 	return (vnic_promisc_set(vnic, on));
1298 }
1299 
1300 static int
1301 vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
1302 {
1303 	vnic_t *vnic = arg;
1304 	int rc = 0;
1305 
1306 	if (add)
1307 		rc = vnic_bcast_add(vnic, addrp, MAC_ADDRTYPE_MULTICAST);
1308 	else
1309 		vnic_bcast_delete(vnic, addrp);
1310 
1311 	return (rc);
1312 }
1313 
1314 static int
1315 vnic_m_unicst(void *arg, const uint8_t *mac_addr)
1316 {
1317 	vnic_t *vnic = arg;
1318 	vnic_mac_t *vnic_mac = vnic->vn_vnic_mac;
1319 	int rv;
1320 
1321 	rw_enter(&vnic_lock, RW_WRITER);
1322 	rv = vnic_modify_mac_addr(vnic, vnic_mac->va_addr_len,
1323 	    (uchar_t *)mac_addr);
1324 	rw_exit(&vnic_lock);
1325 
1326 	if (rv == 0)
1327 		mac_unicst_update(vnic->vn_mh, mac_addr);
1328 	return (0);
1329 }
1330 
1331 int
1332 vnic_info(uint_t *nvnics, uint32_t vnic_id, char *dev_name, void *fn_arg,
1333     vnic_info_new_vnic_fn_t new_vnic_fn)
1334 {
1335 	vnic_info_state_t state;
1336 	int rc = 0;
1337 
1338 	rw_enter(&vnic_lock, RW_READER);
1339 
1340 	*nvnics = vnic_count;
1341 
1342 	bzero(&state, sizeof (state));
1343 	state.vs_vnic_id = vnic_id;
1344 	bcopy(state.vs_dev_name, dev_name, MAXNAMELEN);
1345 	state.vs_new_vnic_fn = new_vnic_fn;
1346 	state.vs_fn_arg = fn_arg;
1347 
1348 	mod_hash_walk(vnic_hash, vnic_info_walker, &state);
1349 
1350 	if ((rc = state.vs_rc) == 0 && vnic_id != 0 &&
1351 	    !state.vs_vnic_found)
1352 		rc = ENOENT;
1353 
1354 	rw_exit(&vnic_lock);
1355 	return (rc);
1356 }
1357 
1358 /*
1359  * Walker invoked when building a list of vnics that must be passed
1360  * up to user space.
1361  */
1362 /*ARGSUSED*/
1363 static uint_t
1364 vnic_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
1365 {
1366 	vnic_t *vnic;
1367 	vnic_info_state_t *state = arg;
1368 
1369 	if (state->vs_rc != 0)
1370 		return (MH_WALK_TERMINATE);	/* terminate walk */
1371 
1372 	vnic = (vnic_t *)val;
1373 
1374 	if (state->vs_vnic_id != 0 && vnic->vn_id != state->vs_vnic_id)
1375 		goto bail;
1376 
1377 	state->vs_vnic_found = B_TRUE;
1378 
1379 	state->vs_rc = state->vs_new_vnic_fn(state->vs_fn_arg,
1380 	    vnic->vn_id, vnic->vn_addr_type, vnic->vn_vnic_mac->va_addr_len,
1381 	    vnic->vn_addr, vnic->vn_vnic_mac->va_dev_name);
1382 bail:
1383 	return ((state->vs_rc == 0) ? MH_WALK_CONTINUE : MH_WALK_TERMINATE);
1384 }
1385 
1386 /*
1387  * vnic_notify_cb() and vnic_notify_walker() below are used to
1388  * process events received from an underlying NIC and, if needed,
1389  * forward these events to the VNICs defined on top of that NIC.
1390  */
1391 
1392 typedef struct vnic_notify_state {
1393 	mac_notify_type_t	vo_type;
1394 	vnic_mac_t		*vo_vnic_mac;
1395 } vnic_notify_state_t;
1396 
1397 /* ARGSUSED */
1398 static uint_t
1399 vnic_notify_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
1400 {
1401 	vnic_t *vnic = (vnic_t *)val;
1402 	vnic_notify_state_t *state = arg;
1403 
1404 	/* ignore VNICs that don't use the specified underlying MAC */
1405 	if (vnic->vn_vnic_mac != state->vo_vnic_mac)
1406 		return (MH_WALK_CONTINUE);
1407 
1408 	switch (state->vo_type) {
1409 	case MAC_NOTE_TX:
1410 		mac_tx_update(vnic->vn_mh);
1411 		break;
1412 	case MAC_NOTE_LINK:
1413 		/*
1414 		 * The VNIC link state must be up regardless of
1415 		 * the link state of the underlying NIC to maintain
1416 		 * connectivity between VNICs on the same host.
1417 		 */
1418 		mac_link_update(vnic->vn_mh, LINK_STATE_UP);
1419 		break;
1420 	case MAC_NOTE_UNICST:
1421 		vnic_update_active_rx(vnic->vn_vnic_mac);
1422 		break;
1423 	case MAC_NOTE_VNIC:
1424 		/* only for clients which share a NIC with a VNIC */
1425 		break;
1426 	case MAC_NOTE_PROMISC:
1427 		mutex_enter(&vnic_mac_lock);
1428 		vnic->vn_vnic_mac->va_txinfo = mac_vnic_tx_get(
1429 		    vnic->vn_vnic_mac->va_mh);
1430 		mutex_exit(&vnic_mac_lock);
1431 		break;
1432 	}
1433 
1434 	return (MH_WALK_CONTINUE);
1435 }
1436 
1437 static void
1438 vnic_notify_cb(void *arg, mac_notify_type_t type)
1439 {
1440 	vnic_mac_t *vnic = arg;
1441 	vnic_notify_state_t state;
1442 
1443 	state.vo_type = type;
1444 	state.vo_vnic_mac = vnic;
1445 
1446 	rw_enter(&vnic_lock, RW_READER);
1447 	mod_hash_walk(vnic_hash, vnic_notify_walker, &state);
1448 	rw_exit(&vnic_lock);
1449 }
1450 
1451 static int
1452 vnic_modify_mac_addr(vnic_t *vnic, uint_t mac_len, uchar_t *mac_addr)
1453 {
1454 	vnic_mac_t *vnic_mac = vnic->vn_vnic_mac;
1455 	vnic_flow_t *vnic_flow = vnic->vn_flow_ent;
1456 
1457 	ASSERT(RW_WRITE_HELD(&vnic_lock));
1458 
1459 	if (mac_len != vnic_mac->va_addr_len)
1460 		return (EINVAL);
1461 
1462 	vnic_classifier_flow_update_addr(vnic_flow, mac_addr);
1463 	return (0);
1464 }
1465 
1466 static int
1467 vnic_promisc_set(vnic_t *vnic, boolean_t on)
1468 {
1469 	vnic_mac_t *vnic_mac = vnic->vn_vnic_mac;
1470 	int r = -1;
1471 
1472 	if (vnic->vn_promisc == on)
1473 		return (0);
1474 
1475 	if (on) {
1476 		r = mac_promisc_set(vnic_mac->va_mh, B_TRUE, MAC_DEVPROMISC);
1477 		if (r != 0)
1478 			return (r);
1479 
1480 		rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER);
1481 		vnic->vn_promisc_next = vnic_mac->va_promisc;
1482 		vnic_mac->va_promisc = vnic;
1483 		vnic_mac->va_promisc_gen++;
1484 
1485 		vnic->vn_promisc = B_TRUE;
1486 		rw_exit(&vnic_mac->va_promisc_lock);
1487 
1488 		return (0);
1489 	} else {
1490 		vnic_t *loop, *prev = NULL;
1491 
1492 		rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER);
1493 		loop = vnic_mac->va_promisc;
1494 
1495 		while ((loop != NULL) && (loop != vnic)) {
1496 			prev = loop;
1497 			loop = loop->vn_promisc_next;
1498 		}
1499 
1500 		if ((loop != NULL) &&
1501 		    ((r = mac_promisc_set(vnic_mac->va_mh, B_FALSE,
1502 		    MAC_DEVPROMISC)) == 0)) {
1503 			if (prev != NULL)
1504 				prev->vn_promisc_next = loop->vn_promisc_next;
1505 			else
1506 				vnic_mac->va_promisc = loop->vn_promisc_next;
1507 			vnic_mac->va_promisc_gen++;
1508 
1509 			vnic->vn_promisc = B_FALSE;
1510 		}
1511 		rw_exit(&vnic_mac->va_promisc_lock);
1512 
1513 		return (r);
1514 	}
1515 }
1516 
1517 void
1518 vnic_promisc_rx(vnic_mac_t *vnic_mac, vnic_t *sender, mblk_t *mp)
1519 {
1520 	vnic_t *loop;
1521 	vnic_flow_t *flow;
1522 	const vnic_flow_fn_info_t *fn_info;
1523 	mac_header_info_t hdr_info;
1524 	boolean_t dst_must_match = B_TRUE;
1525 
1526 	ASSERT(mp->b_next == NULL);
1527 
1528 	rw_enter(&vnic_mac->va_promisc_lock, RW_READER);
1529 	if (vnic_mac->va_promisc == NULL)
1530 		goto done;
1531 
1532 	if (mac_header_info(vnic_mac->va_mh, mp, &hdr_info) != 0)
1533 		goto done;
1534 
1535 	/*
1536 	 * If this is broadcast or multicast then the destination
1537 	 * address need not match for us to deliver it.
1538 	 */
1539 	if ((hdr_info.mhi_dsttype == MAC_ADDRTYPE_BROADCAST) ||
1540 	    (hdr_info.mhi_dsttype == MAC_ADDRTYPE_MULTICAST))
1541 		dst_must_match = B_FALSE;
1542 
1543 	for (loop = vnic_mac->va_promisc;
1544 	    loop != NULL;
1545 	    loop = loop->vn_promisc_next) {
1546 		if (loop == sender)
1547 			continue;
1548 
1549 		if (dst_must_match &&
1550 		    (bcmp(hdr_info.mhi_daddr, loop->vn_addr,
1551 		    sizeof (loop->vn_addr)) != 0))
1552 			continue;
1553 
1554 		flow = loop->vn_flow_ent;
1555 		ASSERT(flow != NULL);
1556 
1557 		if (!flow->vf_is_active) {
1558 			mblk_t *copy;
1559 			uint64_t gen;
1560 
1561 			if ((copy = vnic_copymsg_cksum(mp)) == NULL)
1562 				break;
1563 			if ((sender != NULL) &&
1564 			    ((copy = vnic_fix_cksum(copy)) == NULL))
1565 				break;
1566 
1567 			VNIC_FLOW_REFHOLD(flow);
1568 			gen = vnic_mac->va_promisc_gen;
1569 			rw_exit(&vnic_mac->va_promisc_lock);
1570 
1571 			fn_info = vnic_classifier_get_fn_info(flow);
1572 			(fn_info->ff_fn)(fn_info->ff_arg1,
1573 			    fn_info->ff_arg2, copy);
1574 
1575 			VNIC_FLOW_REFRELE(flow);
1576 			rw_enter(&vnic_mac->va_promisc_lock, RW_READER);
1577 			if (vnic_mac->va_promisc_gen != gen)
1578 				break;
1579 		}
1580 	}
1581 done:
1582 	rw_exit(&vnic_mac->va_promisc_lock);
1583 }
1584